diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 186c5fd..9bc531d 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 6.0.1 +current_version = 6.0.2 commit = True tag = True diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1b57e6d..9935c78 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,17 +1,14 @@ on: workflow_dispatch: + release: + types: + - published push: - tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 - -name: Creating release - -env: - CIBW_TEST_EXTRAS: test - CIBW_TEST_COMMAND: "pytest {project}/tests" - CIBW_TEST_SKIP: "*_arm64 *_universal2:arm64" - CIBW_ARCHS_MACOS: "x86_64 universal2" + branches: + - main + pull_request: +name: Tests/Release jobs: # Build & test simple source release before wasting hours building and # testing the binary build matrix. @@ -19,31 +16,30 @@ jobs: name: Creating source release runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4.1.7 - name: Setting up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.13 - name: Installing python build dependencies run: | - python -m pip install --upgrade pip - python -m pip install --upgrade setuptools + pip install uv - name: Building source distribution run: | - pip install -e ".[release]" - python setup.py sdist + uv build --sdist - name: Ensuring documentation builds run: | - cd docs && make clean && make html - - - uses: actions/upload-artifact@v2 + cd docs && uv run make clean html + + - uses: actions/upload-artifact@v4.6.0 with: + name: dist-sdist path: dist/*.tar.gz - + build_wheels: needs: [sdist] name: "[${{ strategy.job-index }}/${{ strategy.job-total }}] py${{ matrix.py }} on ${{ matrix.os }}" @@ -51,43 +47,43 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-20.04, windows-2019, macos-11] - # cp - CPython - # pp - PyPy - py: ["cp39", "cp310", "cp311", "cp312", "pp37", "pp38", "pp39"] + os: [ubuntu-latest, windows-latest, macos-14] + py: ["cp39", "cp310", "cp311", "cp312", "cp313"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4.1.7 - - uses: actions/setup-python@v2 - name: Setting up Python + - name: Setting up Python + uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: "3.13" - name: Set up QEMU if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 with: platforms: all - name: Build & test wheels - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.22.0 env: - CIBW_ARCHS_LINUX: auto aarch64 ppc64le CIBW_BUILD: "${{ matrix.py }}-*" - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4.6.0 with: + name: dist-${{ matrix.os }}-${{ matrix.py }} path: ./wheelhouse/*.whl upload_all: - needs: [build_wheels, sdist] name: Uploading built packages to pypi for release. + needs: [build_wheels, sdist] + if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4.1.8 with: - name: artifact + pattern: dist-* + merge-multiple: true path: dist - uses: pypa/gh-action-pypi-publish@v1.4.2 @@ -98,28 +94,28 @@ jobs: build_documentation: name: Building & uploading documentation. needs: [upload_all] + if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4.1.7 - name: Setting up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.13 - name: Installing python build dependencies run: | - python -m pip install --upgrade pip - python -m pip install --upgrade setuptools + pip install uv - name: Installing release dependencies. run: | - pip install -e ".[release]" + uv sync - name: Building documentation run: | - cd docs && make clean && make html + cd docs && uv run make clean html - name: Publishing documentation run: | - ghp-import -f -n -c pysimdjson.tkte.ch -p docs/_build/html + uv run ghp-import -f -n -c pysimdjson.tkte.ch -p docs/_build/html diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 70eab9f..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: Run tests - -on: [push, pull_request, workflow_dispatch] - -jobs: - # Tests that are platform-agnostic. - universal: - runs-on: ubuntu-latest - steps: - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - - uses: actions/checkout@v2 - - - name: Installing platform dependencies - run: > - sudo apt-get install -y - build-essential - clang - - - name: Installing python dependencies - env: - BUILD_WITH_CYTHON: 1 - BUILD_FOR_DEBUG: 1 - run: | - python -m pip install --upgrade pip wheel - pip install cython setuptools - pip install -e '.[test]' - rm simdjson/csimdjson.cpp - python setup.py develop - - - name: Running tests with pytest - run: | - coverage run -m pytest - - - name: Generating coverage report - run: | - coverage report -m - - - name: Checking flake8 - run: | - flake8 - - build: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - - steps: - # Python needs to be setup before checkout to prevent files from being - # left in the source tree. See setup-python/issues/106. - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - uses: actions/checkout@v2 - - - name: Installing platform dependencies - if: matrix.os == 'ubuntu-latest' - run: > - sudo apt-get install -y - build-essential - clang - - - name: Installing python dependencies - env: - BUILD_WITH_CYTHON: 1 - run: | - python -m pip install --upgrade pip wheel - pip install cython setuptools - pip install -e '.[test]' - rm simdjson/csimdjson.cpp - python setup.py develop - - - name: Running tests with pytest - run: | - pytest diff --git a/.gitignore b/.gitignore index f81086b..443c39a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ _build/ # coverage.py default output file .coverage +.idea diff --git a/CHANGELOG.md b/CHANGELOG.md index a9307ec..99b7804 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 7.0.0 + +- Drop python 3.8, pypy builds, add python 3.13 (#117, #123) +- Add proper .load/.loads type signatures (#116) +- Updating and modernize github actions, simplify packaging and CI. +- Update upstream simdjson to 3.12.3. +- Add alias to json.JSONEncoder to drop-in API. (#118) +- Deterministic build / static build metadata + ## 6.0.0 - Dropped support for CPython 3.6 - 3.8 which are long past their support window, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0e199d8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=74.1", "Cython"] +build-backend = "setuptools.build_meta" + +[project] +name = "pysimdjson" +version = "7.0.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.9" +dependencies = [ + "pysimdjson", +] + +[tool.uv.sources] +pysimdjson = { workspace = true } + +[tool.setuptools] +ext-modules = [ + { name = "csimdjson", sources = ["simdjson/simdjson.cpp", "simdjson/util.cpp", "simdjson/csimdjson.pyx"], py-limited-api = true }, +] + +[tool.setuptools.packages.find] +include = ["simdjson"] + +[dependency-groups] +dev = [ + "build>=1.2.2.post1", + "bumpversion>=0.6.0", + "coverage>=7.6.12", + "furo>=2024.8.6", + "ghp-import>=2.1.0", + "numpy>=2.0.2", + "pytest>=8.3.4", + "pytest-benchmark>=5.1.0", + "sphinx>=7.4.7", +] + + +[tool.cibuildwheel] +before-test = "pip install pytest pytest-benchmark" +test-command = "pytest {project}/tests" +test-skip = "*_arm64 *_universal2:arm64" +# This should be part of ext-modules but is blocked by setuptools issue #4810. +environment = { CPPFLAGS="-DSIMDJSON_IMPLEMENTATION_FALLBACK=1" } + +[tool.cibuildwheel.linux] +archs = ["auto", "aarch64", "ppc64le"] + +[tool.cibuildwheel.macos] +environment = { CXXFLAGS="-std=c++11" } +archs = ["x86_64", "universal2"] \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index ba13530..0000000 --- a/setup.py +++ /dev/null @@ -1,123 +0,0 @@ -import os.path -import platform - -from setuptools import setup, find_packages, Extension - -try: - from Cython.Build import cythonize -except ImportError: - CYTHON_AVAILABLE = False -else: - CYTHON_AVAILABLE = True - - -root = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(root, 'README.md'), 'rb') as readme: - long_description = readme.read().decode('utf-8') - -system = platform.system() - -extra_compile_args = [] - -if system == 'Darwin': - extra_compile_args.append('-std=c++11') - -if os.getenv('BUILD_WITH_CYTHON') and not CYTHON_AVAILABLE: - print( - 'BUILD_WITH_CYTHON environment variable is set, but cython' - ' is not available. Falling back to pre-cythonized version if' - ' available.' - ) - -macros = [ - ('SIMDJSON_IMPLEMENTATION_FALLBACK', "1"), -] -if os.getenv('BUILD_WITH_CYTHON') and CYTHON_AVAILABLE: - compiler_directives = { - 'embedsignature': True - } - - if os.getenv('BUILD_FOR_DEBUG'): - # Enable line tracing, which also enables support for coverage - # reporting. - macros.extend([ - ('CYTHON_TRACE', "1"), - ('CYTHON_TRACE_NOGIL', "1") - ]) - compiler_directives['linetrace'] = True - - force = bool(os.getenv('FORCE_REBUILD')) - - extensions = cythonize([ - Extension( - 'csimdjson', - [ - 'simdjson/simdjson.cpp', - 'simdjson/util.cpp', - 'simdjson/csimdjson.pyx' - ], - define_macros=macros, - extra_compile_args=extra_compile_args - ) - ], compiler_directives=compiler_directives, force=force) -else: - extensions = [ - Extension( - 'csimdjson', - [ - 'simdjson/simdjson.cpp', - 'simdjson/util.cpp', - 'simdjson/csimdjson.cpp' - ], - define_macros=macros, - extra_compile_args=extra_compile_args, - language='c++' - ) - ] - -setup( - name='pysimdjson', - packages=find_packages(), - version='6.0.1', - description='simdjson bindings for python', - long_description=long_description, - long_description_content_type='text/markdown', - author='Tyler Kennedy', - author_email='tk@tkte.ch', - url='https://github.com/TkTech/pysimdjson', - keywords=['json', 'simdjson', 'simd'], - zip_safe=False, - classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - ], - python_requires='>3.5', - extras_require={ - # Dependencies for package release. - 'release': [ - 'sphinx', - 'furo', - 'ghp-import', - 'bumpversion' - ], - # Dependencies for running tests. - 'test': [ - 'pytest', - 'pytest-benchmark', - 'flake8', - 'coverage', - 'numpy' - ] - }, - ext_modules=extensions, - package_data={ - 'simdjson': [ - 'simdjson/*.pxd', - '__init__.pyi', - 'py.typed' - ] - } -) diff --git a/simdjson/__init__.py b/simdjson/__init__.py index 4e1b14c..b7fafd5 100644 --- a/simdjson/__init__.py +++ b/simdjson/__init__.py @@ -3,18 +3,15 @@ try: from csimdjson import ( - # Objects Parser, Array, Object, - # Constants MAXSIZE_BYTES, PADDING ) except ImportError: raise RuntimeError('Unable to import low-level simdjson bindings.') -# Shuts up *all* linters complaining about our unused imports. _ALL_IMPORTS = [ Parser, Array, @@ -27,12 +24,11 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kwargs): """ - Same as the built-in json.load(), with the following exceptions: + Parse the JSON document in the file-like object fp and return the parsed + object. - - All parse_* arguments are currently ignored. simdjson does not - currently provide hooks for these, but it is planned. - - object_pairs_hook is ignored. - - cls is ignored. + All other arguments are ignored, and are provided only for compatibility + with the built-in json module. """ parser = Parser() return parser.parse(fp.read(), True) @@ -41,12 +37,10 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, parse_int=None, def loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kwargs): """ - Same as the built-in json.loads(), with the following exceptions: + Parse the JSON document s and return the parsed object. - - All parse_* arguments are currently ignored. simdjson does not - currently provide hooks for these, but it is planned. - - object_pairs_hook is ignored. - - cls is ignored. + All other arguments are ignored, and are provided only for compatibility + with the built-in json module. """ parser = Parser() return parser.parse(s, True) @@ -54,3 +48,4 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, dumps = json.dumps dump = json.dump +JSONEncoder = json.JSONEncoder diff --git a/simdjson/__init__.pyi b/simdjson/__init__.pyi index d42b480..fa353a7 100644 --- a/simdjson/__init__.pyi +++ b/simdjson/__init__.pyi @@ -130,6 +130,9 @@ class Parser: dumps = json.dumps dump = json.dump +JSONEncoder = json.JSONEncoder +loads = json.loads +load = json.load MAXSIZE_BYTES: Final[int] = ... PADDING: Final[int] = ... diff --git a/simdjson/csimdjson.cpp b/simdjson/csimdjson.cpp deleted file mode 100644 index ceae13c..0000000 --- a/simdjson/csimdjson.cpp +++ /dev/null @@ -1,39174 +0,0 @@ -/* Generated by Cython 3.0.2 */ - -/* BEGIN: Cython Metadata -{ - "distutils": { - "define_macros": [ - [ - "SIMDJSON_IMPLEMENTATION_FALLBACK", - "1" - ] - ], - "depends": [ - "simdjson/simdjson.h", - "simdjson/util.h" - ], - "include_dirs": [ - "simdjson" - ], - "language": "c++", - "name": "csimdjson", - "sources": [ - "simdjson/csimdjson.pyx", - "simdjson/simdjson.cpp", - "simdjson/util.cpp" - ] - }, - "module_name": "csimdjson" -} -END: Cython Metadata */ - -#ifndef PY_SSIZE_T_CLEAN -#define PY_SSIZE_T_CLEAN -#endif /* PY_SSIZE_T_CLEAN */ -#if defined(CYTHON_LIMITED_API) && 0 - #ifndef Py_LIMITED_API - #if CYTHON_LIMITED_API+0 > 0x03030000 - #define Py_LIMITED_API CYTHON_LIMITED_API - #else - #define Py_LIMITED_API 0x03030000 - #endif - #endif -#endif - -#include "Python.h" -#ifndef Py_PYTHON_H - #error Python headers needed to compile C extensions, please install development version of Python. -#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) - #error Cython requires Python 2.7+ or Python 3.3+. -#else -#if CYTHON_LIMITED_API -#define __PYX_EXTRA_ABI_MODULE_NAME "limited" -#else -#define __PYX_EXTRA_ABI_MODULE_NAME "" -#endif -#define CYTHON_ABI "3_0_2" __PYX_EXTRA_ABI_MODULE_NAME -#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI -#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "." -#define CYTHON_HEX_VERSION 0x030002F0 -#define CYTHON_FUTURE_DIVISION 1 -#include -#ifndef offsetof - #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) -#endif -#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS) - #ifndef __stdcall - #define __stdcall - #endif - #ifndef __cdecl - #define __cdecl - #endif - #ifndef __fastcall - #define __fastcall - #endif -#endif -#ifndef DL_IMPORT - #define DL_IMPORT(t) t -#endif -#ifndef DL_EXPORT - #define DL_EXPORT(t) t -#endif -#define __PYX_COMMA , -#ifndef HAVE_LONG_LONG - #define HAVE_LONG_LONG -#endif -#ifndef PY_LONG_LONG - #define PY_LONG_LONG LONG_LONG -#endif -#ifndef Py_HUGE_VAL - #define Py_HUGE_VAL HUGE_VAL -#endif -#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX -#if defined(GRAALVM_PYTHON) - /* For very preliminary testing purposes. Most variables are set the same as PyPy. - The existence of this section does not imply that anything works or is even tested */ - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 1 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif -#elif defined(PYPY_VERSION) - #define CYTHON_COMPILING_IN_PYPY 1 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #undef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 1 - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3) - #endif - #if PY_VERSION_HEX < 0x03090000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00) - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif -#elif defined(CYTHON_LIMITED_API) - #ifdef Py_LIMITED_API - #undef __PYX_LIMITED_VERSION_HEX - #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API - #endif - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 1 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #undef CYTHON_CLINE_IN_TRACEBACK - #define CYTHON_CLINE_IN_TRACEBACK 0 - #undef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 0 - #undef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 1 - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #undef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #endif - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #undef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 0 - #undef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 0 - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL 0 - #undef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #undef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 1 - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 - #endif -#elif defined(PY_NOGIL) - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 0 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 1 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #undef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 0 - #ifndef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #undef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 0 - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #undef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 0 - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #undef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 0 - #undef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 0 - #ifndef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 -#else - #define CYTHON_COMPILING_IN_PYPY 0 - #define CYTHON_COMPILING_IN_CPYTHON 1 - #define CYTHON_COMPILING_IN_LIMITED_API 0 - #define CYTHON_COMPILING_IN_GRAAL 0 - #define CYTHON_COMPILING_IN_NOGIL 0 - #ifndef CYTHON_USE_TYPE_SLOTS - #define CYTHON_USE_TYPE_SLOTS 1 - #endif - #ifndef CYTHON_USE_TYPE_SPECS - #define CYTHON_USE_TYPE_SPECS 0 - #endif - #ifndef CYTHON_USE_PYTYPE_LOOKUP - #define CYTHON_USE_PYTYPE_LOOKUP 1 - #endif - #if PY_MAJOR_VERSION < 3 - #undef CYTHON_USE_ASYNC_SLOTS - #define CYTHON_USE_ASYNC_SLOTS 0 - #elif !defined(CYTHON_USE_ASYNC_SLOTS) - #define CYTHON_USE_ASYNC_SLOTS 1 - #endif - #ifndef CYTHON_USE_PYLONG_INTERNALS - #define CYTHON_USE_PYLONG_INTERNALS 1 - #endif - #ifndef CYTHON_USE_PYLIST_INTERNALS - #define CYTHON_USE_PYLIST_INTERNALS 1 - #endif - #ifndef CYTHON_USE_UNICODE_INTERNALS - #define CYTHON_USE_UNICODE_INTERNALS 1 - #endif - #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2 - #undef CYTHON_USE_UNICODE_WRITER - #define CYTHON_USE_UNICODE_WRITER 0 - #elif !defined(CYTHON_USE_UNICODE_WRITER) - #define CYTHON_USE_UNICODE_WRITER 1 - #endif - #ifndef CYTHON_AVOID_BORROWED_REFS - #define CYTHON_AVOID_BORROWED_REFS 0 - #endif - #ifndef CYTHON_ASSUME_SAFE_MACROS - #define CYTHON_ASSUME_SAFE_MACROS 1 - #endif - #ifndef CYTHON_UNPACK_METHODS - #define CYTHON_UNPACK_METHODS 1 - #endif - #ifndef CYTHON_FAST_THREAD_STATE - #define CYTHON_FAST_THREAD_STATE 1 - #endif - #ifndef CYTHON_FAST_GIL - #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6) - #endif - #ifndef CYTHON_METH_FASTCALL - #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1) - #endif - #ifndef CYTHON_FAST_PYCALL - #define CYTHON_FAST_PYCALL 1 - #endif - #ifndef CYTHON_PEP487_INIT_SUBCLASS - #define CYTHON_PEP487_INIT_SUBCLASS 1 - #endif - #if PY_VERSION_HEX < 0x03050000 - #undef CYTHON_PEP489_MULTI_PHASE_INIT - #define CYTHON_PEP489_MULTI_PHASE_INIT 0 - #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT) - #define CYTHON_PEP489_MULTI_PHASE_INIT 1 - #endif - #ifndef CYTHON_USE_MODULE_STATE - #define CYTHON_USE_MODULE_STATE 0 - #endif - #if PY_VERSION_HEX < 0x030400a1 - #undef CYTHON_USE_TP_FINALIZE - #define CYTHON_USE_TP_FINALIZE 0 - #elif !defined(CYTHON_USE_TP_FINALIZE) - #define CYTHON_USE_TP_FINALIZE 1 - #endif - #if PY_VERSION_HEX < 0x030600B1 - #undef CYTHON_USE_DICT_VERSIONS - #define CYTHON_USE_DICT_VERSIONS 0 - #elif !defined(CYTHON_USE_DICT_VERSIONS) - #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX < 0x030C00A5) - #endif - #if PY_VERSION_HEX < 0x030700A3 - #undef CYTHON_USE_EXC_INFO_STACK - #define CYTHON_USE_EXC_INFO_STACK 0 - #elif !defined(CYTHON_USE_EXC_INFO_STACK) - #define CYTHON_USE_EXC_INFO_STACK 1 - #endif - #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC - #define CYTHON_UPDATE_DESCRIPTOR_DOC 1 - #endif -#endif -#if !defined(CYTHON_FAST_PYCCALL) -#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1) -#endif -#if !defined(CYTHON_VECTORCALL) -#define CYTHON_VECTORCALL (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1) -#endif -#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1) -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_MAJOR_VERSION < 3 - #include "longintrepr.h" - #endif - #undef SHIFT - #undef BASE - #undef MASK - #ifdef SIZEOF_VOID_P - enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; - #endif -#endif -#ifndef __has_attribute - #define __has_attribute(x) 0 -#endif -#ifndef __has_cpp_attribute - #define __has_cpp_attribute(x) 0 -#endif -#ifndef CYTHON_RESTRICT - #if defined(__GNUC__) - #define CYTHON_RESTRICT __restrict__ - #elif defined(_MSC_VER) && _MSC_VER >= 1400 - #define CYTHON_RESTRICT __restrict - #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define CYTHON_RESTRICT restrict - #else - #define CYTHON_RESTRICT - #endif -#endif -#ifndef CYTHON_UNUSED - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(maybe_unused) - #define CYTHON_UNUSED [[maybe_unused]] - #endif - #endif - #endif -#endif -#ifndef CYTHON_UNUSED -# if defined(__GNUC__) -# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) -# define CYTHON_UNUSED __attribute__ ((__unused__)) -# else -# define CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_UNUSED_VAR -# if defined(__cplusplus) - template void CYTHON_UNUSED_VAR( const T& ) { } -# else -# define CYTHON_UNUSED_VAR(x) (void)(x) -# endif -#endif -#ifndef CYTHON_MAYBE_UNUSED_VAR - #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x) -#endif -#ifndef CYTHON_NCP_UNUSED -# if CYTHON_COMPILING_IN_CPYTHON -# define CYTHON_NCP_UNUSED -# else -# define CYTHON_NCP_UNUSED CYTHON_UNUSED -# endif -#endif -#ifndef CYTHON_USE_CPP_STD_MOVE - #if defined(__cplusplus) && (\ - __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600)) - #define CYTHON_USE_CPP_STD_MOVE 1 - #else - #define CYTHON_USE_CPP_STD_MOVE 0 - #endif -#endif -#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) -#ifdef _MSC_VER - #ifndef _MSC_STDINT_H_ - #if _MSC_VER < 1300 - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - #else - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; - #endif - #endif - #if _MSC_VER < 1300 - #ifdef _WIN64 - typedef unsigned long long __pyx_uintptr_t; - #else - typedef unsigned int __pyx_uintptr_t; - #endif - #else - #ifdef _WIN64 - typedef unsigned __int64 __pyx_uintptr_t; - #else - typedef unsigned __int32 __pyx_uintptr_t; - #endif - #endif -#else - #include - typedef uintptr_t __pyx_uintptr_t; -#endif -#ifndef CYTHON_FALLTHROUGH - #if defined(__cplusplus) - /* for clang __has_cpp_attribute(fallthrough) is true even before C++17 - * but leads to warnings with -pedantic, since it is a C++17 feature */ - #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) - #if __has_cpp_attribute(fallthrough) - #define CYTHON_FALLTHROUGH [[fallthrough]] - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_cpp_attribute(clang::fallthrough) - #define CYTHON_FALLTHROUGH [[clang::fallthrough]] - #elif __has_cpp_attribute(gnu::fallthrough) - #define CYTHON_FALLTHROUGH [[gnu::fallthrough]] - #endif - #endif - #endif - #ifndef CYTHON_FALLTHROUGH - #if __has_attribute(fallthrough) - #define CYTHON_FALLTHROUGH __attribute__((fallthrough)) - #else - #define CYTHON_FALLTHROUGH - #endif - #endif - #if defined(__clang__) && defined(__apple_build_version__) - #if __apple_build_version__ < 7000000 - #undef CYTHON_FALLTHROUGH - #define CYTHON_FALLTHROUGH - #endif - #endif -#endif -#ifdef __cplusplus - template - struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);}; - #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL::value) -#else - #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0) -#endif -#if CYTHON_COMPILING_IN_PYPY == 1 - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000) -#else - #define __PYX_NEED_TP_PRINT_SLOT (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000) -#endif -#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer)) - -#ifndef __cplusplus - #error "Cython files generated with the C++ option must be compiled with a C++ compiler." -#endif -#ifndef CYTHON_INLINE - #if defined(__clang__) - #define CYTHON_INLINE __inline__ __attribute__ ((__unused__)) - #else - #define CYTHON_INLINE inline - #endif -#endif -template -void __Pyx_call_destructor(T& x) { - x.~T(); -} -template -class __Pyx_FakeReference { - public: - __Pyx_FakeReference() : ptr(NULL) { } - __Pyx_FakeReference(const T& ref) : ptr(const_cast(&ref)) { } - T *operator->() { return ptr; } - T *operator&() { return ptr; } - operator T&() { return *ptr; } - template bool operator ==(const U& other) const { return *ptr == other; } - template bool operator !=(const U& other) const { return *ptr != other; } - template bool operator==(const __Pyx_FakeReference& other) const { return *ptr == *other.ptr; } - template bool operator!=(const __Pyx_FakeReference& other) const { return *ptr != *other.ptr; } - private: - T *ptr; -}; - -#define __PYX_BUILD_PY_SSIZE_T "n" -#define CYTHON_FORMAT_SSIZE_T "z" -#if PY_MAJOR_VERSION < 3 - #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" - #define __Pyx_DefaultClassType PyClass_Type - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_BUILTIN_MODULE_NAME "builtins" - #define __Pyx_DefaultClassType PyType_Type -#if CYTHON_COMPILING_IN_LIMITED_API - static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyObject *exception_table = NULL; - PyObject *types_module=NULL, *code_type=NULL, *result=NULL; - PyObject *version_info; // borrowed - PyObject *py_minor_version = NULL; - long minor_version = 0; - PyObject *type, *value, *traceback; - PyErr_Fetch(&type, &value, &traceback); - #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000 - minor_version = 11; // we don't yet need to distinguish between versions > 11 - #else - if (!(version_info = PySys_GetObject("version_info"))) goto end; - if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end; - minor_version = PyLong_AsLong(py_minor_version); - if (minor_version == -1 && PyErr_Occurred()) goto end; - #endif - if (!(types_module = PyImport_ImportModule("types"))) goto end; - if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end; - if (minor_version <= 7) { - (void)p; - result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else if (minor_version <= 10) { - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, fline, lnos, fv, cell); - } else { - if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end; - result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code, - c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell); - } - end: - Py_XDECREF(code_type); - Py_XDECREF(exception_table); - Py_XDECREF(types_module); - Py_XDECREF(py_minor_version); - if (type) { - PyErr_Restore(type, value, traceback); - } - return result; - } - #ifndef CO_OPTIMIZED - #define CO_OPTIMIZED 0x0001 - #endif - #ifndef CO_NEWLOCALS - #define CO_NEWLOCALS 0x0002 - #endif - #ifndef CO_VARARGS - #define CO_VARARGS 0x0004 - #endif - #ifndef CO_VARKEYWORDS - #define CO_VARKEYWORDS 0x0008 - #endif - #ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x0200 - #endif - #ifndef CO_GENERATOR - #define CO_GENERATOR 0x0020 - #endif - #ifndef CO_COROUTINE - #define CO_COROUTINE 0x0080 - #endif -#elif PY_VERSION_HEX >= 0x030B0000 - static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f, - PyObject *code, PyObject *c, PyObject* n, PyObject *v, - PyObject *fv, PyObject *cell, PyObject* fn, - PyObject *name, int fline, PyObject *lnos) { - PyCodeObject *result; - PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0); // we don't have access to __pyx_empty_bytes here - if (!empty_bytes) return NULL; - result = - #if PY_VERSION_HEX >= 0x030C0000 - PyUnstable_Code_NewWithPosOnlyArgs - #else - PyCode_NewWithPosOnlyArgs - #endif - (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes); - Py_DECREF(empty_bytes); - return result; - } -#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#else - #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ - PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) -#endif -#endif -#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE) - #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type) -#else - #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is) - #define __Pyx_Py_Is(x, y) Py_Is(x, y) -#else - #define __Pyx_Py_Is(x, y) ((x) == (y)) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone) - #define __Pyx_Py_IsNone(ob) Py_IsNone(ob) -#else - #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue) - #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob) -#else - #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True) -#endif -#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse) - #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob) -#else - #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False) -#endif -#define __Pyx_NoneAsNull(obj) (__Pyx_Py_IsNone(obj) ? NULL : (obj)) -#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o) -#else - #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o) -#endif -#ifndef CO_COROUTINE - #define CO_COROUTINE 0x80 -#endif -#ifndef CO_ASYNC_GENERATOR - #define CO_ASYNC_GENERATOR 0x200 -#endif -#ifndef Py_TPFLAGS_CHECKTYPES - #define Py_TPFLAGS_CHECKTYPES 0 -#endif -#ifndef Py_TPFLAGS_HAVE_INDEX - #define Py_TPFLAGS_HAVE_INDEX 0 -#endif -#ifndef Py_TPFLAGS_HAVE_NEWBUFFER - #define Py_TPFLAGS_HAVE_NEWBUFFER 0 -#endif -#ifndef Py_TPFLAGS_HAVE_FINALIZE - #define Py_TPFLAGS_HAVE_FINALIZE 0 -#endif -#ifndef Py_TPFLAGS_SEQUENCE - #define Py_TPFLAGS_SEQUENCE 0 -#endif -#ifndef Py_TPFLAGS_MAPPING - #define Py_TPFLAGS_MAPPING 0 -#endif -#ifndef METH_STACKLESS - #define METH_STACKLESS 0 -#endif -#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL) - #ifndef METH_FASTCALL - #define METH_FASTCALL 0x80 - #endif - typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs); - typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args, - Py_ssize_t nargs, PyObject *kwnames); -#else - #define __Pyx_PyCFunctionFast _PyCFunctionFast - #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords -#endif -#if CYTHON_METH_FASTCALL - #define __Pyx_METH_FASTCALL METH_FASTCALL - #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast - #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords -#else - #define __Pyx_METH_FASTCALL METH_VARARGS - #define __Pyx_PyCFunction_FastCall PyCFunction - #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords -#endif -#if CYTHON_VECTORCALL - #define __pyx_vectorcallfunc vectorcallfunc - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET - #define __Pyx_PyVectorcall_NARGS(n) PyVectorcall_NARGS((size_t)(n)) -#elif CYTHON_BACKPORT_VECTORCALL - typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args, - size_t nargsf, PyObject *kwnames); - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET ((size_t)1 << (8 * sizeof(size_t) - 1)) - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)) -#else - #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET 0 - #define __Pyx_PyVectorcall_NARGS(n) ((Py_ssize_t)(n)) -#endif -#if __PYX_LIMITED_VERSION_HEX < 0x030900B1 - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) ((void)m, PyType_FromSpecWithBases(s, b)) - typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *); -#else - #define __Pyx_PyType_FromModuleAndSpec(m, s, b) PyType_FromModuleAndSpec(m, s, b) - #define __Pyx_PyCMethod PyCMethod -#endif -#ifndef METH_METHOD - #define METH_METHOD 0x200 -#endif -#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc) - #define PyObject_Malloc(s) PyMem_Malloc(s) - #define PyObject_Free(p) PyMem_Free(p) - #define PyObject_Realloc(p) PyMem_Realloc(p) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) -#else - #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0) - #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyThreadState_Current PyThreadState_Get() -#elif !CYTHON_FAST_THREAD_STATE - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#elif PY_VERSION_HEX >= 0x03060000 - #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet() -#elif PY_VERSION_HEX >= 0x03000000 - #define __Pyx_PyThreadState_Current PyThreadState_GET() -#else - #define __Pyx_PyThreadState_Current _PyThreadState_Current -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op) -{ - void *result; - result = PyModule_GetState(op); - if (!result) - Py_FatalError("Couldn't find the module state"); - return result; -} -#endif -#define __Pyx_PyObject_GetSlot(obj, name, func_ctype) __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype) -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((func_ctype) PyType_GetSlot((type), Py_##name)) -#else - #define __Pyx_PyType_GetSlot(type, name, func_ctype) ((type)->name) -#endif -#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT) -#include "pythread.h" -#define Py_tss_NEEDS_INIT 0 -typedef int Py_tss_t; -static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) { - *key = PyThread_create_key(); - return 0; -} -static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) { - Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t)); - *key = Py_tss_NEEDS_INIT; - return key; -} -static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) { - PyObject_Free(key); -} -static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) { - return *key != Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) { - PyThread_delete_key(*key); - *key = Py_tss_NEEDS_INIT; -} -static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) { - return PyThread_set_key_value(*key, value); -} -static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) { - return PyThread_get_key_value(*key); -} -#endif -#if PY_MAJOR_VERSION < 3 - #if CYTHON_COMPILING_IN_PYPY - #if PYPY_VERSION_NUM < 0x07030600 - #if defined(__cplusplus) && __cplusplus >= 201402L - [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]] - #elif defined(__GNUC__) || defined(__clang__) - __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))) - #elif defined(_MSC_VER) - __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")) - #endif - static CYTHON_INLINE int PyGILState_Check(void) { - return 0; - } - #else // PYPY_VERSION_NUM < 0x07030600 - #endif // PYPY_VERSION_NUM < 0x07030600 - #else - static CYTHON_INLINE int PyGILState_Check(void) { - PyThreadState * tstate = _PyThreadState_Current; - return tstate && (tstate == PyGILState_GetThisThreadState()); - } - #endif -#endif -#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized) -#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n)) -#else -#define __Pyx_PyDict_NewPresized(n) PyDict_New() -#endif -#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION - #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) -#else - #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) - #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && CYTHON_USE_UNICODE_INTERNALS -#define __Pyx_PyDict_GetItemStrWithError(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash) -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) { - PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name); - if (res == NULL) PyErr_Clear(); - return res; -} -#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000) -#define __Pyx_PyDict_GetItemStrWithError PyDict_GetItemWithError -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#else -static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) { -#if CYTHON_COMPILING_IN_PYPY - return PyDict_GetItem(dict, name); -#else - PyDictEntry *ep; - PyDictObject *mp = (PyDictObject*) dict; - long hash = ((PyStringObject *) name)->ob_shash; - assert(hash != -1); - ep = (mp->ma_lookup)(mp, name, hash); - if (ep == NULL) { - return NULL; - } - return ep->me_value; -#endif -} -#define __Pyx_PyDict_GetItemStr PyDict_GetItem -#endif -#if CYTHON_USE_TYPE_SLOTS - #define __Pyx_PyType_GetFlags(tp) (((PyTypeObject *)tp)->tp_flags) - #define __Pyx_PyType_HasFeature(type, feature) ((__Pyx_PyType_GetFlags(type) & (feature)) != 0) - #define __Pyx_PyObject_GetIterNextFunc(obj) (Py_TYPE(obj)->tp_iternext) -#else - #define __Pyx_PyType_GetFlags(tp) (PyType_GetFlags((PyTypeObject *)tp)) - #define __Pyx_PyType_HasFeature(type, feature) PyType_HasFeature(type, feature) - #define __Pyx_PyObject_GetIterNextFunc(obj) PyIter_Next -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v) -#else - #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v) -#endif -#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000 -#define __Pyx_PyHeapTypeObject_GC_Del(obj) {\ - PyTypeObject *type = Py_TYPE(obj);\ - assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\ - PyObject_GC_Del(obj);\ - Py_DECREF(type);\ -} -#else -#define __Pyx_PyHeapTypeObject_GC_Del(obj) PyObject_GC_Del(obj) -#endif -#if CYTHON_COMPILING_IN_LIMITED_API - #define CYTHON_PEP393_ENABLED 1 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GetLength(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((void)u, 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((void)u, (0)) - #define __Pyx_PyUnicode_DATA(u) ((void*)u) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)k, PyUnicode_ReadChar((PyObject*)(d), i)) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GetLength(u)) -#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) - #define CYTHON_PEP393_ENABLED 1 - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_READY(op) (0) - #else - #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ - 0 : _PyUnicode_Ready((PyObject *)(op))) - #endif - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u) - #define __Pyx_PyUnicode_KIND(u) ((int)PyUnicode_KIND(u)) - #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) - #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, (Py_UCS4) ch) - #if PY_VERSION_HEX >= 0x030C0000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u)) - #else - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000 - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length)) - #else - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u))) - #endif - #endif -#else - #define CYTHON_PEP393_ENABLED 0 - #define PyUnicode_1BYTE_KIND 1 - #define PyUnicode_2BYTE_KIND 2 - #define PyUnicode_4BYTE_KIND 4 - #define __Pyx_PyUnicode_READY(op) (0) - #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) - #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) - #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U) - #define __Pyx_PyUnicode_KIND(u) ((int)sizeof(Py_UNICODE)) - #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) - #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) - #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch) - #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) -#else - #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) - #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ - PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) -#endif -#if CYTHON_COMPILING_IN_PYPY - #if !defined(PyUnicode_DecodeUnicodeEscape) - #define PyUnicode_DecodeUnicodeEscape(s, size, errors) PyUnicode_Decode(s, size, "unicode_escape", errors) - #endif - #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500) - #undef PyUnicode_Contains - #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) - #endif - #if !defined(PyByteArray_Check) - #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type) - #endif - #if !defined(PyObject_Format) - #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt) - #endif -#endif -#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) -#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) -#else - #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) -#endif -#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII) - #define PyObject_ASCII(o) PyObject_Repr(o) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBaseString_Type PyUnicode_Type - #define PyStringObject PyUnicodeObject - #define PyString_Type PyUnicode_Type - #define PyString_Check PyUnicode_Check - #define PyString_CheckExact PyUnicode_CheckExact -#ifndef PyObject_Unicode - #define PyObject_Unicode PyObject_Str -#endif -#endif -#if PY_MAJOR_VERSION >= 3 - #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) - #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) -#else - #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) - #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) -#endif -#if CYTHON_COMPILING_IN_CPYTHON - #define __Pyx_PySequence_ListKeepNew(obj)\ - (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj)) -#else - #define __Pyx_PySequence_ListKeepNew(obj) PySequence_List(obj) -#endif -#ifndef PySet_CheckExact - #define PySet_CheckExact(obj) __Pyx_IS_TYPE(obj, &PySet_Type) -#endif -#if PY_VERSION_HEX >= 0x030900A4 - #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size) -#else - #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt) - #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size) -#endif -#if CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i) - #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0)) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o) -#else - #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i) - #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq) - #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v) - #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v) - #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o) - #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o) - #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o) - #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o) - #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyIntObject PyLongObject - #define PyInt_Type PyLong_Type - #define PyInt_Check(op) PyLong_Check(op) - #define PyInt_CheckExact(op) PyLong_CheckExact(op) - #define __Pyx_Py3Int_Check(op) PyLong_Check(op) - #define __Pyx_Py3Int_CheckExact(op) PyLong_CheckExact(op) - #define PyInt_FromString PyLong_FromString - #define PyInt_FromUnicode PyLong_FromUnicode - #define PyInt_FromLong PyLong_FromLong - #define PyInt_FromSize_t PyLong_FromSize_t - #define PyInt_FromSsize_t PyLong_FromSsize_t - #define PyInt_AsLong PyLong_AsLong - #define PyInt_AS_LONG PyLong_AS_LONG - #define PyInt_AsSsize_t PyLong_AsSsize_t - #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask - #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask - #define PyNumber_Int PyNumber_Long -#else - #define __Pyx_Py3Int_Check(op) (PyLong_Check(op) || PyInt_Check(op)) - #define __Pyx_Py3Int_CheckExact(op) (PyLong_CheckExact(op) || PyInt_CheckExact(op)) -#endif -#if PY_MAJOR_VERSION >= 3 - #define PyBoolObject PyLongObject -#endif -#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY - #ifndef PyUnicode_InternFromString - #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) - #endif -#endif -#if PY_VERSION_HEX < 0x030200A4 - typedef long Py_hash_t; - #define __Pyx_PyInt_FromHash_t PyInt_FromLong - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsHash_t -#else - #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t - #define __Pyx_PyInt_AsHash_t __Pyx_PyIndex_AsSsize_t -#endif -#if CYTHON_USE_ASYNC_SLOTS - #if PY_VERSION_HEX >= 0x030500B1 - #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods - #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) - #else - #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) - #endif -#else - #define __Pyx_PyType_AsAsync(obj) NULL -#endif -#ifndef __Pyx_PyAsyncMethodsStruct - typedef struct { - unaryfunc am_await; - unaryfunc am_aiter; - unaryfunc am_anext; - } __Pyx_PyAsyncMethodsStruct; -#endif - -#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS) - #if !defined(_USE_MATH_DEFINES) - #define _USE_MATH_DEFINES - #endif -#endif -#include -#ifdef NAN -#define __PYX_NAN() ((float) NAN) -#else -static CYTHON_INLINE float __PYX_NAN() { - float value; - memset(&value, 0xFF, sizeof(value)); - return value; -} -#endif -#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL) -#define __Pyx_truncl trunc -#else -#define __Pyx_truncl truncl -#endif - -#define __PYX_MARK_ERR_POS(f_index, lineno) \ - { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; } -#define __PYX_ERR(f_index, lineno, Ln_error) \ - { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; } - -#ifdef CYTHON_EXTERN_C - #undef __PYX_EXTERN_C - #define __PYX_EXTERN_C CYTHON_EXTERN_C -#elif defined(__PYX_EXTERN_C) - #ifdef _MSC_VER - #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") - #else - #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. - #endif -#else - #define __PYX_EXTERN_C extern "C++" -#endif - -#define __PYX_HAVE__csimdjson -#define __PYX_HAVE_API__csimdjson -/* Early includes */ -#include "ios" -#include "new" -#include "stdexcept" -#include "typeinfo" -#include -#include -#include -#include -#include -#include "util.h" -#include "simdjson.h" -#include "pythread.h" -#include -#ifdef _OPENMP -#include -#endif /* _OPENMP */ - -#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS) -#define CYTHON_WITHOUT_ASSERTIONS -#endif - -typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding; - const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; - -#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 -#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 1 -#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8) -#define __PYX_DEFAULT_STRING_ENCODING "utf8" -#define __Pyx_PyObject_FromString __Pyx_PyUnicode_FromString -#define __Pyx_PyObject_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize -#define __Pyx_uchar_cast(c) ((unsigned char)c) -#define __Pyx_long_cast(x) ((long)x) -#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ - (sizeof(type) < sizeof(Py_ssize_t)) ||\ - (sizeof(type) > sizeof(Py_ssize_t) &&\ - likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX) &&\ - (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ - v == (type)PY_SSIZE_T_MIN))) ||\ - (sizeof(type) == sizeof(Py_ssize_t) &&\ - (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ - v == (type)PY_SSIZE_T_MAX))) ) -static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) { - return (size_t) i < (size_t) limit; -} -#if defined (__cplusplus) && __cplusplus >= 201103L - #include - #define __Pyx_sst_abs(value) std::abs(value) -#elif SIZEOF_INT >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) abs(value) -#elif SIZEOF_LONG >= SIZEOF_SIZE_T - #define __Pyx_sst_abs(value) labs(value) -#elif defined (_MSC_VER) - #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value)) -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - #define __Pyx_sst_abs(value) llabs(value) -#elif defined (__GNUC__) - #define __Pyx_sst_abs(value) __builtin_llabs(value) -#else - #define __Pyx_sst_abs(value) ((value<0) ? -value : value) -#endif -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*); -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); -#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) -#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) -#define __Pyx_PyBytes_FromString PyBytes_FromString -#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); -#if PY_MAJOR_VERSION < 3 - #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize -#else - #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString - #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize -#endif -#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s)) -#define __Pyx_PyObject_AsWritableString(s) ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableSString(s) ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s)) -#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) -#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) -#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) -#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) -#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const wchar_t *u) -{ - const wchar_t *u_end = u; - while (*u_end++) ; - return (size_t)(u_end - u - 1); -} -#else -static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) -{ - const Py_UNICODE *u_end = u; - while (*u_end++) ; - return (size_t)(u_end - u - 1); -} -#endif -#define __Pyx_PyUnicode_FromOrdinal(o) PyUnicode_FromOrdinal((int)o) -#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) -#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode -#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode -#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) -#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b); -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*); -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x); -#define __Pyx_PySequence_Tuple(obj)\ - (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj)) -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); -#if CYTHON_ASSUME_SAFE_MACROS -#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) -#else -#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) -#endif -#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) -#if PY_MAJOR_VERSION >= 3 -#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x)) -#else -#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x)) -#endif -#if CYTHON_USE_PYLONG_INTERNALS - #if PY_VERSION_HEX >= 0x030C00A7 - #ifndef _PyLong_SIGN_MASK - #define _PyLong_SIGN_MASK 3 - #endif - #ifndef _PyLong_NON_SIZE_BITS - #define _PyLong_NON_SIZE_BITS 3 - #endif - #define __Pyx_PyLong_Sign(x) (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK) - #define __Pyx_PyLong_IsNeg(x) ((__Pyx_PyLong_Sign(x) & 2) != 0) - #define __Pyx_PyLong_IsNonNeg(x) (!__Pyx_PyLong_IsNeg(x)) - #define __Pyx_PyLong_IsZero(x) (__Pyx_PyLong_Sign(x) & 1) - #define __Pyx_PyLong_IsPos(x) (__Pyx_PyLong_Sign(x) == 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) (__Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_SignedDigitCount(x)\ - ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x)) - #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue) - #define __Pyx_PyLong_IsCompact(x) PyUnstable_Long_IsCompact((PyLongObject*) x) - #define __Pyx_PyLong_CompactValue(x) PyUnstable_Long_CompactValue((PyLongObject*) x) - #else - #define __Pyx_PyLong_IsCompact(x) (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS)) - #define __Pyx_PyLong_CompactValue(x) ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0]) - #endif - typedef Py_ssize_t __Pyx_compact_pylong; - typedef size_t __Pyx_compact_upylong; - #else // Py < 3.12 - #define __Pyx_PyLong_IsNeg(x) (Py_SIZE(x) < 0) - #define __Pyx_PyLong_IsNonNeg(x) (Py_SIZE(x) >= 0) - #define __Pyx_PyLong_IsZero(x) (Py_SIZE(x) == 0) - #define __Pyx_PyLong_IsPos(x) (Py_SIZE(x) > 0) - #define __Pyx_PyLong_CompactValueUnsigned(x) ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0]) - #define __Pyx_PyLong_DigitCount(x) __Pyx_sst_abs(Py_SIZE(x)) - #define __Pyx_PyLong_SignedDigitCount(x) Py_SIZE(x) - #define __Pyx_PyLong_IsCompact(x) (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1) - #define __Pyx_PyLong_CompactValue(x)\ - ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0])) - typedef sdigit __Pyx_compact_pylong; - typedef digit __Pyx_compact_upylong; - #endif - #if PY_VERSION_HEX >= 0x030C00A5 - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) - #else - #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->ob_digit) - #endif -#endif -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII -static int __Pyx_sys_getdefaultencoding_not_ascii; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - PyObject* ascii_chars_u = NULL; - PyObject* ascii_chars_b = NULL; - const char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - if (strcmp(default_encoding_c, "ascii") == 0) { - __Pyx_sys_getdefaultencoding_not_ascii = 0; - } else { - char ascii_chars[128]; - int c; - for (c = 0; c < 128; c++) { - ascii_chars[c] = (char) c; - } - __Pyx_sys_getdefaultencoding_not_ascii = 1; - ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); - if (!ascii_chars_u) goto bad; - ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); - if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { - PyErr_Format( - PyExc_ValueError, - "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", - default_encoding_c); - goto bad; - } - Py_DECREF(ascii_chars_u); - Py_DECREF(ascii_chars_b); - } - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - Py_XDECREF(ascii_chars_u); - Py_XDECREF(ascii_chars_b); - return -1; -} -#endif -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) -#else -#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) -#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -static char* __PYX_DEFAULT_STRING_ENCODING; -static int __Pyx_init_sys_getdefaultencoding_params(void) { - PyObject* sys; - PyObject* default_encoding = NULL; - char* default_encoding_c; - sys = PyImport_ImportModule("sys"); - if (!sys) goto bad; - default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); - Py_DECREF(sys); - if (!default_encoding) goto bad; - default_encoding_c = PyBytes_AsString(default_encoding); - if (!default_encoding_c) goto bad; - __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1); - if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; - strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); - Py_DECREF(default_encoding); - return 0; -bad: - Py_XDECREF(default_encoding); - return -1; -} -#endif -#endif - - -/* Test for GCC > 2.95 */ -#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) - #define likely(x) __builtin_expect(!!(x), 1) - #define unlikely(x) __builtin_expect(!!(x), 0) -#else /* !__GNUC__ or GCC < 2.95 */ - #define likely(x) (x) - #define unlikely(x) (x) -#endif /* __GNUC__ */ -static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; } - -#if !CYTHON_USE_MODULE_STATE -static PyObject *__pyx_m = NULL; -#endif -static int __pyx_lineno; -static int __pyx_clineno = 0; -static const char * __pyx_cfilenm = __FILE__; -static const char *__pyx_filename; - -/* #### Code section: filename_table ### */ - -static const char *__pyx_f[] = { - "simdjson/csimdjson.pyx", - "", - "venv/lib/python3.11/site-packages/Cython/Includes/cpython/type.pxd", -}; -/* #### Code section: utility_code_proto_before_types ### */ -/* ForceInitThreads.proto */ -#ifndef __PYX_FORCE_INIT_THREADS - #define __PYX_FORCE_INIT_THREADS 0 -#endif - -/* NoFastGil.proto */ -#define __Pyx_PyGILState_Ensure PyGILState_Ensure -#define __Pyx_PyGILState_Release PyGILState_Release -#define __Pyx_FastGIL_Remember() -#define __Pyx_FastGIL_Forget() -#define __Pyx_FastGilFuncInit() - -/* BufferFormatStructs.proto */ -struct __Pyx_StructField_; -#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0) -typedef struct { - const char* name; - struct __Pyx_StructField_* fields; - size_t size; - size_t arraysize[8]; - int ndim; - char typegroup; - char is_unsigned; - int flags; -} __Pyx_TypeInfo; -typedef struct __Pyx_StructField_ { - __Pyx_TypeInfo* type; - const char* name; - size_t offset; -} __Pyx_StructField; -typedef struct { - __Pyx_StructField* field; - size_t parent_offset; -} __Pyx_BufFmt_StackElem; -typedef struct { - __Pyx_StructField root; - __Pyx_BufFmt_StackElem* head; - size_t fmt_offset; - size_t new_count, enc_count; - size_t struct_alignment; - int is_complex; - char enc_type; - char new_packmode; - char enc_packmode; - char is_valid_array; -} __Pyx_BufFmt_Context; - -/* Atomics.proto */ -#include -#ifndef CYTHON_ATOMICS - #define CYTHON_ATOMICS 1 -#endif -#define __PYX_CYTHON_ATOMICS_ENABLED() CYTHON_ATOMICS -#define __pyx_atomic_int_type int -#define __pyx_nonatomic_int_type int -#if CYTHON_ATOMICS && (defined(__STDC_VERSION__) &&\ - (__STDC_VERSION__ >= 201112L) &&\ - !defined(__STDC_NO_ATOMICS__)) - #include -#elif CYTHON_ATOMICS && (defined(__cplusplus) && (\ - (__cplusplus >= 201103L) ||\ - (defined(_MSC_VER) && _MSC_VER >= 1700))) - #include -#endif -#if CYTHON_ATOMICS && (defined(__STDC_VERSION__) &&\ - (__STDC_VERSION__ >= 201112L) &&\ - !defined(__STDC_NO_ATOMICS__) &&\ - ATOMIC_INT_LOCK_FREE == 2) - #undef __pyx_atomic_int_type - #define __pyx_atomic_int_type atomic_int - #define __pyx_atomic_incr_aligned(value) atomic_fetch_add_explicit(value, 1, memory_order_relaxed) - #define __pyx_atomic_decr_aligned(value) atomic_fetch_sub_explicit(value, 1, memory_order_acq_rel) - #if defined(__PYX_DEBUG_ATOMICS) && defined(_MSC_VER) - #pragma message ("Using standard C atomics") - #elif defined(__PYX_DEBUG_ATOMICS) - #warning "Using standard C atomics" - #endif -#elif CYTHON_ATOMICS && (defined(__cplusplus) && (\ - (__cplusplus >= 201103L) ||\ -\ - (defined(_MSC_VER) && _MSC_VER >= 1700)) &&\ - ATOMIC_INT_LOCK_FREE == 2) - #undef __pyx_atomic_int_type - #define __pyx_atomic_int_type std::atomic_int - #define __pyx_atomic_incr_aligned(value) std::atomic_fetch_add_explicit(value, 1, std::memory_order_relaxed) - #define __pyx_atomic_decr_aligned(value) std::atomic_fetch_sub_explicit(value, 1, std::memory_order_acq_rel) - #if defined(__PYX_DEBUG_ATOMICS) && defined(_MSC_VER) - #pragma message ("Using standard C++ atomics") - #elif defined(__PYX_DEBUG_ATOMICS) - #warning "Using standard C++ atomics" - #endif -#elif CYTHON_ATOMICS && (__GNUC__ >= 5 || (__GNUC__ == 4 &&\ - (__GNUC_MINOR__ > 1 ||\ - (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ >= 2)))) - #define __pyx_atomic_incr_aligned(value) __sync_fetch_and_add(value, 1) - #define __pyx_atomic_decr_aligned(value) __sync_fetch_and_sub(value, 1) - #ifdef __PYX_DEBUG_ATOMICS - #warning "Using GNU atomics" - #endif -#elif CYTHON_ATOMICS && defined(_MSC_VER) - #include - #undef __pyx_atomic_int_type - #define __pyx_atomic_int_type long - #define __pyx_nonatomic_int_type long - #pragma intrinsic (_InterlockedExchangeAdd) - #define __pyx_atomic_incr_aligned(value) _InterlockedExchangeAdd(value, 1) - #define __pyx_atomic_decr_aligned(value) _InterlockedExchangeAdd(value, -1) - #ifdef __PYX_DEBUG_ATOMICS - #pragma message ("Using MSVC atomics") - #endif -#else - #undef CYTHON_ATOMICS - #define CYTHON_ATOMICS 0 - #ifdef __PYX_DEBUG_ATOMICS - #warning "Not using atomics" - #endif -#endif -#if CYTHON_ATOMICS - #define __pyx_add_acquisition_count(memview)\ - __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview)) - #define __pyx_sub_acquisition_count(memview)\ - __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview)) -#else - #define __pyx_add_acquisition_count(memview)\ - __pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock) - #define __pyx_sub_acquisition_count(memview)\ - __pyx_sub_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock) -#endif - -/* MemviewSliceStruct.proto */ -struct __pyx_memoryview_obj; -typedef struct { - struct __pyx_memoryview_obj *memview; - char *data; - Py_ssize_t shape[8]; - Py_ssize_t strides[8]; - Py_ssize_t suboffsets[8]; -} __Pyx_memviewslice; -#define __Pyx_MemoryView_Len(m) (m.shape[0]) - -/* #### Code section: numeric_typedefs ### */ -/* #### Code section: complex_type_declarations ### */ -/* #### Code section: type_declarations ### */ - -/*--- Type declarations ---*/ -struct __pyx_obj_9csimdjson_ArrayBuffer; -struct __pyx_obj_9csimdjson_Array; -struct __pyx_obj_9csimdjson_Object; -struct __pyx_obj_9csimdjson_Parser; -struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__; -struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__; -struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values; -struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items; -struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations; -struct __pyx_array_obj; -struct __pyx_MemviewEnum_obj; -struct __pyx_memoryview_obj; -struct __pyx_memoryviewslice_obj; -struct __pyx_opt_args_9csimdjson_element_to_primitive; - -/* "csimdjson.pyx":69 - * - * - * cdef inline object element_to_primitive(Parser p, simd_element e, # <<<<<<<<<<<<<< - * bint recursive=False): - * cdef: - */ -struct __pyx_opt_args_9csimdjson_element_to_primitive { - int __pyx_n; - int recursive; -}; - -/* "csimdjson.pyx":104 - * - * - * cdef class ArrayBuffer: # <<<<<<<<<<<<<< - * """ - * A container for the flattened data of a homogeneous :class:`Array`. - */ -struct __pyx_obj_9csimdjson_ArrayBuffer { - PyObject_HEAD - struct __pyx_vtabstruct_9csimdjson_ArrayBuffer *__pyx_vtab; - void *buffer; - size_t size; -}; - - -/* "csimdjson.pyx":157 - * - * - * cdef class Array: # <<<<<<<<<<<<<< - * """A proxy object that behaves much like a real `list()`. - * - */ -struct __pyx_obj_9csimdjson_Array { - PyObject_HEAD - struct __pyx_vtabstruct_9csimdjson_Array *__pyx_vtab; - struct __pyx_obj_9csimdjson_Parser *parser; - simdjson::dom::array c_element; - std::shared_ptr c_parser; -}; - - -/* "csimdjson.pyx":275 - * - * - * cdef class Object: # <<<<<<<<<<<<<< - * """A proxy object that behaves much like a real `dict()`. - * - */ -struct __pyx_obj_9csimdjson_Object { - PyObject_HEAD - struct __pyx_vtabstruct_9csimdjson_Object *__pyx_vtab; - struct __pyx_obj_9csimdjson_Parser *parser; - simdjson::dom::object c_element; - std::shared_ptr c_parser; -}; - - -/* "csimdjson.pyx":392 - * - * - * cdef class Parser: # <<<<<<<<<<<<<< - * """ - * A `Parser` instance is used to load and/or parse a JSON document. - */ -struct __pyx_obj_9csimdjson_Parser { - PyObject_HEAD - std::shared_ptr c_parser; -}; - - -/* "csimdjson.pyx":218 - * return self.c_element.size() - * - * def __iter__(self): # <<<<<<<<<<<<<< - * cdef simd_array.iterator it = self.c_element.begin() - * while it != self.c_element.end(): - */ -struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ { - PyObject_HEAD - simdjson::dom::array::iterator __pyx_v_it; - struct __pyx_obj_9csimdjson_Array *__pyx_v_self; -}; - - -/* "csimdjson.pyx":321 - * return True - * - * def __iter__(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all keys in this `Object`. - */ -struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ { - PyObject_HEAD - char const *__pyx_v_data; - simdjson::dom::object::iterator __pyx_v_it; - struct __pyx_obj_9csimdjson_Object *__pyx_v_self; - size_t __pyx_v_size; -}; - - -/* "csimdjson.pyx":338 - * keys = __iter__ - * - * def values(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over of all values in this `Object`. - */ -struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values { - PyObject_HEAD - simdjson::dom::object::iterator __pyx_v_it; - struct __pyx_obj_9csimdjson_Object *__pyx_v_self; -}; - - -/* "csimdjson.pyx":347 - * preincrement(it) - * - * def items(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all the (key, value) pairs in this - */ -struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items { - PyObject_HEAD - char const *__pyx_v_data; - simdjson::dom::object::iterator __pyx_v_it; - struct __pyx_obj_9csimdjson_Object *__pyx_v_self; - size_t __pyx_v_size; -}; - - -/* "csimdjson.pyx":521 - * return element_to_primitive(self, document, recursive) - * - * def get_implementations(self, supported_by_runtime=True): # <<<<<<<<<<<<<< - * """ - * A list of available parser implementations in the form of [(name, - */ -struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations { - PyObject_HEAD - simdjson::implementation const *__pyx_v_impl; - struct __pyx_obj_9csimdjson_Parser *__pyx_v_self; - PyObject *__pyx_v_supported_by_runtime; - simdjson::implementation const *const *__pyx_t_0; - simdjson::internal::available_implementation_list const *__pyx_t_1; -}; - - -/* "View.MemoryView":114 - * @cython.collection_type("sequence") - * @cname("__pyx_array") - * cdef class array: # <<<<<<<<<<<<<< - * - * cdef: - */ -struct __pyx_array_obj { - PyObject_HEAD - struct __pyx_vtabstruct_array *__pyx_vtab; - char *data; - Py_ssize_t len; - char *format; - int ndim; - Py_ssize_t *_shape; - Py_ssize_t *_strides; - Py_ssize_t itemsize; - PyObject *mode; - PyObject *_format; - void (*callback_free_data)(void *); - int free_data; - int dtype_is_object; -}; - - -/* "View.MemoryView":302 - * - * @cname('__pyx_MemviewEnum') - * cdef class Enum(object): # <<<<<<<<<<<<<< - * cdef object name - * def __init__(self, name): - */ -struct __pyx_MemviewEnum_obj { - PyObject_HEAD - PyObject *name; -}; - - -/* "View.MemoryView":337 - * - * @cname('__pyx_memoryview') - * cdef class memoryview: # <<<<<<<<<<<<<< - * - * cdef object obj - */ -struct __pyx_memoryview_obj { - PyObject_HEAD - struct __pyx_vtabstruct_memoryview *__pyx_vtab; - PyObject *obj; - PyObject *_size; - PyObject *_array_interface; - PyThread_type_lock lock; - __pyx_atomic_int_type acquisition_count; - Py_buffer view; - int flags; - int dtype_is_object; - __Pyx_TypeInfo *typeinfo; -}; - - -/* "View.MemoryView":952 - * @cython.collection_type("sequence") - * @cname('__pyx_memoryviewslice') - * cdef class _memoryviewslice(memoryview): # <<<<<<<<<<<<<< - * "Internal class for passing memoryview slices to Python" - * - */ -struct __pyx_memoryviewslice_obj { - struct __pyx_memoryview_obj __pyx_base; - __Pyx_memviewslice from_slice; - PyObject *from_object; - PyObject *(*to_object_func)(char *); - int (*to_dtype_func)(char *, PyObject *); -}; - - - -/* "csimdjson.pyx":104 - * - * - * cdef class ArrayBuffer: # <<<<<<<<<<<<<< - * """ - * A container for the flattened data of a homogeneous :class:`Array`. - */ - -struct __pyx_vtabstruct_9csimdjson_ArrayBuffer { - PyObject *(*from_element)(simdjson::dom::array, PyObject *); -}; -static struct __pyx_vtabstruct_9csimdjson_ArrayBuffer *__pyx_vtabptr_9csimdjson_ArrayBuffer; -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_11ArrayBuffer_from_element(simdjson::dom::array, PyObject *); - - -/* "csimdjson.pyx":157 - * - * - * cdef class Array: # <<<<<<<<<<<<<< - * """A proxy object that behaves much like a real `list()`. - * - */ - -struct __pyx_vtabstruct_9csimdjson_Array { - PyObject *(*from_element)(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element); -}; -static struct __pyx_vtabstruct_9csimdjson_Array *__pyx_vtabptr_9csimdjson_Array; -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_5Array_from_element(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element); - - -/* "csimdjson.pyx":275 - * - * - * cdef class Object: # <<<<<<<<<<<<<< - * """A proxy object that behaves much like a real `dict()`. - * - */ - -struct __pyx_vtabstruct_9csimdjson_Object { - PyObject *(*from_element)(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element); -}; -static struct __pyx_vtabstruct_9csimdjson_Object *__pyx_vtabptr_9csimdjson_Object; -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_6Object_from_element(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element); - - -/* "View.MemoryView":114 - * @cython.collection_type("sequence") - * @cname("__pyx_array") - * cdef class array: # <<<<<<<<<<<<<< - * - * cdef: - */ - -struct __pyx_vtabstruct_array { - PyObject *(*get_memview)(struct __pyx_array_obj *); -}; -static struct __pyx_vtabstruct_array *__pyx_vtabptr_array; - - -/* "View.MemoryView":337 - * - * @cname('__pyx_memoryview') - * cdef class memoryview: # <<<<<<<<<<<<<< - * - * cdef object obj - */ - -struct __pyx_vtabstruct_memoryview { - char *(*get_item_pointer)(struct __pyx_memoryview_obj *, PyObject *); - PyObject *(*is_slice)(struct __pyx_memoryview_obj *, PyObject *); - PyObject *(*setitem_slice_assignment)(struct __pyx_memoryview_obj *, PyObject *, PyObject *); - PyObject *(*setitem_slice_assign_scalar)(struct __pyx_memoryview_obj *, struct __pyx_memoryview_obj *, PyObject *); - PyObject *(*setitem_indexed)(struct __pyx_memoryview_obj *, PyObject *, PyObject *); - PyObject *(*convert_item_to_object)(struct __pyx_memoryview_obj *, char *); - PyObject *(*assign_item_from_object)(struct __pyx_memoryview_obj *, char *, PyObject *); - PyObject *(*_get_base)(struct __pyx_memoryview_obj *); -}; -static struct __pyx_vtabstruct_memoryview *__pyx_vtabptr_memoryview; - - -/* "View.MemoryView":952 - * @cython.collection_type("sequence") - * @cname('__pyx_memoryviewslice') - * cdef class _memoryviewslice(memoryview): # <<<<<<<<<<<<<< - * "Internal class for passing memoryview slices to Python" - * - */ - -struct __pyx_vtabstruct__memoryviewslice { - struct __pyx_vtabstruct_memoryview __pyx_base; -}; -static struct __pyx_vtabstruct__memoryviewslice *__pyx_vtabptr__memoryviewslice; -/* #### Code section: utility_code_proto ### */ - -/* --- Runtime support code (head) --- */ -/* Refnanny.proto */ -#ifndef CYTHON_REFNANNY - #define CYTHON_REFNANNY 0 -#endif -#if CYTHON_REFNANNY - typedef struct { - void (*INCREF)(void*, PyObject*, Py_ssize_t); - void (*DECREF)(void*, PyObject*, Py_ssize_t); - void (*GOTREF)(void*, PyObject*, Py_ssize_t); - void (*GIVEREF)(void*, PyObject*, Py_ssize_t); - void* (*SetupContext)(const char*, Py_ssize_t, const char*); - void (*FinishContext)(void**); - } __Pyx_RefNannyAPIStruct; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; - static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); - #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; -#ifdef WITH_THREAD - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - if (acquire_gil) {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - PyGILState_Release(__pyx_gilstate_save);\ - } else {\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\ - } - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } -#else - #define __Pyx_RefNannySetupContext(name, acquire_gil)\ - __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__)) - #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext() -#endif - #define __Pyx_RefNannyFinishContextNogil() {\ - PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ - __Pyx_RefNannyFinishContext();\ - PyGILState_Release(__pyx_gilstate_save);\ - } - #define __Pyx_RefNannyFinishContext()\ - __Pyx_RefNanny->FinishContext(&__pyx_refnanny) - #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__)) - #define __Pyx_XINCREF(r) do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0) - #define __Pyx_XDECREF(r) do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0) - #define __Pyx_XGOTREF(r) do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0) - #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0) -#else - #define __Pyx_RefNannyDeclarations - #define __Pyx_RefNannySetupContext(name, acquire_gil) - #define __Pyx_RefNannyFinishContextNogil() - #define __Pyx_RefNannyFinishContext() - #define __Pyx_INCREF(r) Py_INCREF(r) - #define __Pyx_DECREF(r) Py_DECREF(r) - #define __Pyx_GOTREF(r) - #define __Pyx_GIVEREF(r) - #define __Pyx_XINCREF(r) Py_XINCREF(r) - #define __Pyx_XDECREF(r) Py_XDECREF(r) - #define __Pyx_XGOTREF(r) - #define __Pyx_XGIVEREF(r) -#endif -#define __Pyx_Py_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; Py_XDECREF(tmp);\ - } while (0) -#define __Pyx_XDECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_XDECREF(tmp);\ - } while (0) -#define __Pyx_DECREF_SET(r, v) do {\ - PyObject *tmp = (PyObject *) r;\ - r = v; __Pyx_DECREF(tmp);\ - } while (0) -#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) -#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) - -/* PyErrExceptionMatches.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err) -static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err); -#else -#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err) -#endif - -/* PyThreadStateGet.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate; -#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current; -#if PY_VERSION_HEX >= 0x030C00A6 -#define __Pyx_PyErr_Occurred() (__pyx_tstate->current_exception != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL) -#else -#define __Pyx_PyErr_Occurred() (__pyx_tstate->curexc_type != NULL) -#define __Pyx_PyErr_CurrentExceptionType() (__pyx_tstate->curexc_type) -#endif -#else -#define __Pyx_PyThreadState_declare -#define __Pyx_PyThreadState_assign -#define __Pyx_PyErr_Occurred() (PyErr_Occurred() != NULL) -#define __Pyx_PyErr_CurrentExceptionType() PyErr_Occurred() -#endif - -/* PyErrFetchRestore.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL) -#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6 -#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL)) -#else -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#endif -#else -#define __Pyx_PyErr_Clear() PyErr_Clear() -#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc) -#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb) -#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb) -#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb) -#endif - -/* PyObjectGetAttrStr.proto */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n) -#endif - -/* PyObjectGetAttrStrNoError.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name); - -/* GetBuiltinName.proto */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name); - -/* TupleAndListFromArray.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n); -static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n); -#endif - -/* IncludeStringH.proto */ -#include - -/* BytesEquals.proto */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals); - -/* UnicodeEquals.proto */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals); - -/* fastcall.proto */ -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i) -#elif CYTHON_ASSUME_SAFE_MACROS - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i) -#else - #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i) -#endif -#if CYTHON_AVOID_BORROWED_REFS - #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg) - #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg) -#else - #define __Pyx_Arg_NewRef_VARARGS(arg) arg // no-op - #define __Pyx_Arg_XDECREF_VARARGS(arg) // no-op - arg is borrowed -#endif -#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds) -#define __Pyx_KwValues_VARARGS(args, nargs) NULL -#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s) -#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw) -#if CYTHON_METH_FASTCALL - #define __Pyx_Arg_FASTCALL(args, i) args[i] - #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds) - #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs)) - static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s); - #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw) - #define __Pyx_Arg_NewRef_FASTCALL(arg) arg // no-op, __Pyx_Arg_FASTCALL is direct and this needs - #define __Pyx_Arg_XDECREF_FASTCALL(arg) // no-op - arg was returned from array -#else - #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS - #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS - #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS - #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS - #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS - #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg) - #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg) -#endif -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start) -#else -#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop) -#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop) -#endif - -/* RaiseArgTupleInvalid.proto */ -static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact, - Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); - -/* RaiseDoubleKeywords.proto */ -static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); - -/* ParseKeywords.proto */ -static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, - const char* function_name); - -/* ArgTypeTest.proto */ -#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\ - ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\ - __Pyx__ArgTypeTest(obj, type, name, exact)) -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact); - -/* RaiseException.proto */ -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); - -/* PyFunctionFastCall.proto */ -#if CYTHON_FAST_PYCALL -#if !CYTHON_VECTORCALL -#define __Pyx_PyFunction_FastCall(func, args, nargs)\ - __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL) -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs); -#endif -#define __Pyx_BUILD_ASSERT_EXPR(cond)\ - (sizeof(char [1 - 2*!(cond)]) - 1) -#ifndef Py_MEMBER_SIZE -#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) -#endif -#if !CYTHON_VECTORCALL -#if PY_VERSION_HEX >= 0x03080000 - #include "frameobject.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif - #define __Pxy_PyFrame_Initialize_Offsets() - #define __Pyx_PyFrame_GetLocalsplus(frame) ((frame)->f_localsplus) -#else - static size_t __pyx_pyframe_localsplus_offset = 0; - #include "frameobject.h" - #define __Pxy_PyFrame_Initialize_Offsets()\ - ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\ - (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus))) - #define __Pyx_PyFrame_GetLocalsplus(frame)\ - (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset)) -#endif -#endif -#endif - -/* PyObjectCall.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw); -#else -#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) -#endif - -/* PyObjectCallMethO.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -/* PyObjectFastCall.proto */ -#define __Pyx_PyObject_FastCall(func, args, nargs) __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL) -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs); - -/* RaiseUnexpectedTypeError.proto */ -static int __Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj); - -/* GCCDiagnostics.proto */ -#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) -#define __Pyx_HAS_GCC_DIAGNOSTIC -#endif - -/* BuildPyUnicode.proto */ -static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, int clength, - int prepend_sign, char padding_char); - -/* CIntToPyUnicode.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_int(int value, Py_ssize_t width, char padding_char, char format_char); - -/* CIntToPyUnicode.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_Py_ssize_t(Py_ssize_t value, Py_ssize_t width, char padding_char, char format_char); - -/* JoinPyUnicode.proto */ -static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, - Py_UCS4 max_char); - -/* StrEquals.proto */ -#if PY_MAJOR_VERSION >= 3 -#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals -#else -#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals -#endif - -/* PyObjectFormatSimple.proto */ -#if CYTHON_COMPILING_IN_PYPY - #define __Pyx_PyObject_FormatSimple(s, f) (\ - likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ - PyObject_Format(s, f)) -#elif PY_MAJOR_VERSION < 3 - #define __Pyx_PyObject_FormatSimple(s, f) (\ - likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ - likely(PyString_CheckExact(s)) ? PyUnicode_FromEncodedObject(s, NULL, "strict") :\ - PyObject_Format(s, f)) -#elif CYTHON_USE_TYPE_SLOTS - #define __Pyx_PyObject_FormatSimple(s, f) (\ - likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ - likely(PyLong_CheckExact(s)) ? PyLong_Type.tp_repr(s) :\ - likely(PyFloat_CheckExact(s)) ? PyFloat_Type.tp_repr(s) :\ - PyObject_Format(s, f)) -#else - #define __Pyx_PyObject_FormatSimple(s, f) (\ - likely(PyUnicode_CheckExact(s)) ? (Py_INCREF(s), s) :\ - PyObject_Format(s, f)) -#endif - -CYTHON_UNUSED static int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ -static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *); /*proto*/ -/* GetAttr.proto */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *); - -/* GetItemInt.proto */ -#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\ - (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\ - __Pyx_GetItemInt_Generic(o, to_py_func(i)))) -#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ - (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL)) -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, - int wraparound, int boundscheck); -#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\ - (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL)) -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, - int wraparound, int boundscheck); -static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j); -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, - int is_list, int wraparound, int boundscheck); - -/* PyObjectCallOneArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - -/* ObjectGetItem.proto */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key); -#else -#define __Pyx_PyObject_GetItem(obj, key) PyObject_GetItem(obj, key) -#endif - -/* KeywordStringCheck.proto */ -static int __Pyx_CheckKeywordStrings(PyObject *kw, const char* function_name, int kw_allowed); - -/* DivInt[Py_ssize_t].proto */ -static CYTHON_INLINE Py_ssize_t __Pyx_div_Py_ssize_t(Py_ssize_t, Py_ssize_t); - -/* UnaryNegOverflows.proto */ -#define __Pyx_UNARY_NEG_WOULD_OVERFLOW(x)\ - (((x) < 0) & ((unsigned long)(x) == 0-(unsigned long)(x))) - -/* GetAttr3.proto */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *, PyObject *, PyObject *); - -/* PyDictVersioning.proto */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1) -#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\ - (version_var) = __PYX_GET_DICT_VERSION(dict);\ - (cache_var) = (value); -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\ - static PY_UINT64_T __pyx_dict_version = 0;\ - static PyObject *__pyx_dict_cached_value = NULL;\ - if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\ - (VAR) = __pyx_dict_cached_value;\ - } else {\ - (VAR) = __pyx_dict_cached_value = (LOOKUP);\ - __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\ - }\ -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj); -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj); -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version); -#else -#define __PYX_GET_DICT_VERSION(dict) (0) -#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var) -#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP); -#endif - -/* GetModuleGlobalName.proto */ -#if CYTHON_USE_DICT_VERSIONS -#define __Pyx_GetModuleGlobalName(var, name) do {\ - static PY_UINT64_T __pyx_dict_version = 0;\ - static PyObject *__pyx_dict_cached_value = NULL;\ - (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\ - (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\ - __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ -} while(0) -#define __Pyx_GetModuleGlobalNameUncached(var, name) do {\ - PY_UINT64_T __pyx_dict_version;\ - PyObject *__pyx_dict_cached_value;\ - (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\ -} while(0) -static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value); -#else -#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name) -#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name) -static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name); -#endif - -/* AssertionsEnabled.proto */ -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) - #define __Pyx_init_assertions_enabled() (0) - #define __pyx_assertions_enabled() (1) -#elif CYTHON_COMPILING_IN_LIMITED_API || (CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030C0000) - static int __pyx_assertions_enabled_flag; - #define __pyx_assertions_enabled() (__pyx_assertions_enabled_flag) - static int __Pyx_init_assertions_enabled(void) { - PyObject *builtins, *debug, *debug_str; - int flag; - builtins = PyEval_GetBuiltins(); - if (!builtins) goto bad; - debug_str = PyUnicode_FromStringAndSize("__debug__", 9); - if (!debug_str) goto bad; - debug = PyObject_GetItem(builtins, debug_str); - Py_DECREF(debug_str); - if (!debug) goto bad; - flag = PyObject_IsTrue(debug); - Py_DECREF(debug); - if (flag == -1) goto bad; - __pyx_assertions_enabled_flag = flag; - return 0; - bad: - __pyx_assertions_enabled_flag = 1; - return -1; - } -#else - #define __Pyx_init_assertions_enabled() (0) - #define __pyx_assertions_enabled() (!Py_OptimizeFlag) -#endif - -/* RaiseTooManyValuesToUnpack.proto */ -static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected); - -/* RaiseNeedMoreValuesToUnpack.proto */ -static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index); - -/* RaiseNoneIterError.proto */ -static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); - -/* ExtTypeTest.proto */ -static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type); - -/* GetTopmostException.proto */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate); -#endif - -/* SaveResetException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb); -#else -#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb) -#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb) -#endif - -/* GetException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb) -static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#else -static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb); -#endif - -/* SwapException.proto */ -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_ExceptionSwap(type, value, tb) __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb) -static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb); -#else -static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb); -#endif - -/* Import.proto */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); - -/* ImportDottedModule.proto */ -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple); -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple); -#endif - -/* ssize_strlen.proto */ -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s); - -/* FastTypeChecks.proto */ -#if CYTHON_COMPILING_IN_CPYTHON -#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2) -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type); -static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2); -#else -#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) -#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2)) -#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type) -#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2)) -#endif -#define __Pyx_PyErr_ExceptionMatches2(err1, err2) __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2) -#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception) - -CYTHON_UNUSED static int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ -/* ListCompAppend.proto */ -#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS -static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) { - PyListObject* L = (PyListObject*) list; - Py_ssize_t len = Py_SIZE(list); - if (likely(L->allocated > len)) { - Py_INCREF(x); - PyList_SET_ITEM(list, len, x); - __Pyx_SET_SIZE(list, len + 1); - return 0; - } - return PyList_Append(list, x); -} -#else -#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x) -#endif - -/* PySequenceMultiply.proto */ -#define __Pyx_PySequence_Multiply_Left(mul, seq) __Pyx_PySequence_Multiply(seq, mul) -static CYTHON_INLINE PyObject* __Pyx_PySequence_Multiply(PyObject *seq, Py_ssize_t mul); - -/* SetItemInt.proto */ -#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\ - (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\ - __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\ - (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\ - __Pyx_SetItemInt_Generic(o, to_py_func(i), v))) -static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v); -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, - int is_list, int wraparound, int boundscheck); - -/* RaiseUnboundLocalError.proto */ -static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname); - -/* DivInt[long].proto */ -static CYTHON_INLINE long __Pyx_div_long(long, long); - -/* PySequenceContains.proto */ -static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* seq, int eq) { - int result = PySequence_Contains(seq, item); - return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); -} - -/* ImportFrom.proto */ -static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); - -/* HasAttr.proto */ -static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *); - -/* MoveIfSupported.proto */ -#if CYTHON_USE_CPP_STD_MOVE - #include - #define __PYX_STD_MOVE_IF_SUPPORTED(x) std::move(x) -#else - #define __PYX_STD_MOVE_IF_SUPPORTED(x) x -#endif - -/* pep479.proto */ -static void __Pyx_Generator_Replace_StopIteration(int in_async_gen); - -/* RaiseKeywordRequired.proto */ -static void __Pyx_RaiseKeywordRequired(const char* func_name, PyObject* kw_name); - -/* PyIntCompare.proto */ -static CYTHON_INLINE int __Pyx_PyInt_BoolEqObjC(PyObject *op1, PyObject *op2, long intval, long inplace); - -/* BufferIndexError.proto */ -static void __Pyx_RaiseBufferIndexError(int axis); - -/* PyObject_Str.proto */ -#define __Pyx_PyObject_Str(obj)\ - (likely(PyString_CheckExact(obj)) ? __Pyx_NewRef(obj) : PyObject_Str(obj)) - -/* PyObject_GenericGetAttrNoDict.proto */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr -#endif - -/* PyObject_GenericGetAttr.proto */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name); -#else -#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr -#endif - -/* IncludeStructmemberH.proto */ -#include - -/* FixUpExtensionType.proto */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type); -#endif - -/* PyObjectCallNoArg.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); - -/* PyObjectGetMethod.proto */ -static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); - -/* PyObjectCallMethod0.proto */ -static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name); - -/* ValidateBasesTuple.proto */ -#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS -static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases); -#endif - -/* PyType_Ready.proto */ -CYTHON_UNUSED static int __Pyx_PyType_Ready(PyTypeObject *t); - -/* SetVTable.proto */ -static int __Pyx_SetVtable(PyTypeObject* typeptr , void* vtable); - -/* GetVTable.proto */ -static void* __Pyx_GetVtable(PyTypeObject *type); - -/* MergeVTables.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_MergeVtables(PyTypeObject *type); -#endif - -/* SetupReduce.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_setup_reduce(PyObject* type_obj); -#endif - -/* TypeImport.proto */ -#ifndef __PYX_HAVE_RT_ImportType_proto_3_0_2 -#define __PYX_HAVE_RT_ImportType_proto_3_0_2 -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -#include -#endif -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L -#define __PYX_GET_STRUCT_ALIGNMENT_3_0_2(s) alignof(s) -#else -#define __PYX_GET_STRUCT_ALIGNMENT_3_0_2(s) sizeof(void*) -#endif -enum __Pyx_ImportType_CheckSize_3_0_2 { - __Pyx_ImportType_CheckSize_Error_3_0_2 = 0, - __Pyx_ImportType_CheckSize_Warn_3_0_2 = 1, - __Pyx_ImportType_CheckSize_Ignore_3_0_2 = 2 -}; -static PyTypeObject *__Pyx_ImportType_3_0_2(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_2 check_size); -#endif - -/* CIntToPyUnicode.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From___pyx_anon_enum(int value, Py_ssize_t width, char padding_char, char format_char); - -/* FetchSharedCythonModule.proto */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void); - -/* FetchCommonType.proto */ -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type); -#else -static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases); -#endif - -/* PyMethodNew.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - PyObject *typesModule=NULL, *methodType=NULL, *result=NULL; - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - typesModule = PyImport_ImportModule("types"); - if (!typesModule) return NULL; - methodType = PyObject_GetAttrString(typesModule, "MethodType"); - Py_DECREF(typesModule); - if (!methodType) return NULL; - result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL); - Py_DECREF(methodType); - return result; -} -#elif PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) { - CYTHON_UNUSED_VAR(typ); - if (!self) - return __Pyx_NewRef(func); - return PyMethod_New(func, self); -} -#else - #define __Pyx_PyMethod_New PyMethod_New -#endif - -/* PyVectorcallFastCallDict.proto */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw); -#endif - -/* CythonFunctionShared.proto */ -#define __Pyx_CyFunction_USED -#define __Pyx_CYFUNCTION_STATICMETHOD 0x01 -#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02 -#define __Pyx_CYFUNCTION_CCLASS 0x04 -#define __Pyx_CYFUNCTION_COROUTINE 0x08 -#define __Pyx_CyFunction_GetClosure(f)\ - (((__pyx_CyFunctionObject *) (f))->func_closure) -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - #define __Pyx_CyFunction_GetClassObj(f)\ - (((__pyx_CyFunctionObject *) (f))->func_classobj) -#else - #define __Pyx_CyFunction_GetClassObj(f)\ - ((PyObject*) ((PyCMethodObject *) (f))->mm_class) -#endif -#define __Pyx_CyFunction_SetClassObj(f, classobj)\ - __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj)) -#define __Pyx_CyFunction_Defaults(type, f)\ - ((type *)(((__pyx_CyFunctionObject *) (f))->defaults)) -#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\ - ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g) -typedef struct { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject_HEAD - PyObject *func; -#elif PY_VERSION_HEX < 0x030900B1 - PyCFunctionObject func; -#else - PyCMethodObject func; -#endif -#if CYTHON_BACKPORT_VECTORCALL - __pyx_vectorcallfunc func_vectorcall; -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_weakreflist; -#endif - PyObject *func_dict; - PyObject *func_name; - PyObject *func_qualname; - PyObject *func_doc; - PyObject *func_globals; - PyObject *func_code; - PyObject *func_closure; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - PyObject *func_classobj; -#endif - void *defaults; - int defaults_pyobjects; - size_t defaults_size; // used by FusedFunction for copying defaults - int flags; - PyObject *defaults_tuple; - PyObject *defaults_kwdict; - PyObject *(*defaults_getter)(PyObject *); - PyObject *func_annotations; - PyObject *func_is_coroutine; -} __pyx_CyFunctionObject; -#define __Pyx_CyFunction_Check(obj) __Pyx_TypeCheck(obj, __pyx_CyFunctionType) -#define __Pyx_IsCyOrPyCFunction(obj) __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type) -#define __Pyx_CyFunction_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_CyFunctionType) -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj); -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m, - size_t size, - int pyobjects); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m, - PyObject *tuple); -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m, - PyObject *dict); -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m, - PyObject *dict); -static int __pyx_CyFunction_init(PyObject *module); -#if CYTHON_METH_FASTCALL -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames); -#if CYTHON_BACKPORT_VECTORCALL -#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall) -#else -#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall) -#endif -#endif - -/* CythonFunction.proto */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, - int flags, PyObject* qualname, - PyObject *closure, - PyObject *module, PyObject *globals, - PyObject* code); - -/* GetNameInClass.proto */ -#define __Pyx_GetNameInClass(var, nmspace, name) (var) = __Pyx__GetNameInClass(nmspace, name) -static PyObject *__Pyx__GetNameInClass(PyObject *nmspace, PyObject *name); - -/* CLineInTraceback.proto */ -#ifdef CYTHON_CLINE_IN_TRACEBACK -#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0) -#else -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line); -#endif - -/* CodeObjectCache.proto */ -#if !CYTHON_COMPILING_IN_LIMITED_API -typedef struct { - PyCodeObject* code_object; - int code_line; -} __Pyx_CodeObjectCacheEntry; -struct __Pyx_CodeObjectCache { - int count; - int max_count; - __Pyx_CodeObjectCacheEntry* entries; -}; -static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); -static PyCodeObject *__pyx_find_code_object(int code_line); -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); -#endif - -/* AddTraceback.proto */ -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename); - -/* None.proto */ -#include - -#if PY_MAJOR_VERSION < 3 - static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags); - static void __Pyx_ReleaseBuffer(Py_buffer *view); -#else - #define __Pyx_GetBuffer PyObject_GetBuffer - #define __Pyx_ReleaseBuffer PyBuffer_Release -#endif - - -/* BufferStructDeclare.proto */ -typedef struct { - Py_ssize_t shape, strides, suboffsets; -} __Pyx_Buf_DimInfo; -typedef struct { - size_t refcount; - Py_buffer pybuffer; -} __Pyx_Buffer; -typedef struct { - __Pyx_Buffer *rcbuffer; - char *data; - __Pyx_Buf_DimInfo diminfo[8]; -} __Pyx_LocalBuf_ND; - -/* MemviewSliceIsContig.proto */ -static int __pyx_memviewslice_is_contig(const __Pyx_memviewslice mvs, char order, int ndim); - -/* OverlappingSlices.proto */ -static int __pyx_slices_overlap(__Pyx_memviewslice *slice1, - __Pyx_memviewslice *slice2, - int ndim, size_t itemsize); - -/* CppExceptionConversion.proto */ -#ifndef __Pyx_CppExn2PyErr -#include -#include -#include -#include -static void __Pyx_CppExn2PyErr() { - try { - if (PyErr_Occurred()) - ; // let the latest Python exn pass through and ignore the current one - else - throw; - } catch (const std::bad_alloc& exn) { - PyErr_SetString(PyExc_MemoryError, exn.what()); - } catch (const std::bad_cast& exn) { - PyErr_SetString(PyExc_TypeError, exn.what()); - } catch (const std::bad_typeid& exn) { - PyErr_SetString(PyExc_TypeError, exn.what()); - } catch (const std::domain_error& exn) { - PyErr_SetString(PyExc_ValueError, exn.what()); - } catch (const std::invalid_argument& exn) { - PyErr_SetString(PyExc_ValueError, exn.what()); - } catch (const std::ios_base::failure& exn) { - PyErr_SetString(PyExc_IOError, exn.what()); - } catch (const std::out_of_range& exn) { - PyErr_SetString(PyExc_IndexError, exn.what()); - } catch (const std::overflow_error& exn) { - PyErr_SetString(PyExc_OverflowError, exn.what()); - } catch (const std::range_error& exn) { - PyErr_SetString(PyExc_ArithmeticError, exn.what()); - } catch (const std::underflow_error& exn) { - PyErr_SetString(PyExc_ArithmeticError, exn.what()); - } catch (const std::exception& exn) { - PyErr_SetString(PyExc_RuntimeError, exn.what()); - } - catch (...) - { - PyErr_SetString(PyExc_RuntimeError, "Unknown exception"); - } -} -#endif - -/* IsLittleEndian.proto */ -static CYTHON_INLINE int __Pyx_Is_Little_Endian(void); - -/* BufferFormatCheck.proto */ -static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts); -static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, - __Pyx_BufFmt_StackElem* stack, - __Pyx_TypeInfo* type); - -/* TypeInfoCompare.proto */ -static int __pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b); - -/* MemviewSliceValidateAndInit.proto */ -static int __Pyx_ValidateAndInit_memviewslice( - int *axes_specs, - int c_or_f_flag, - int buf_flags, - int ndim, - __Pyx_TypeInfo *dtype, - __Pyx_BufFmt_StackElem stack[], - __Pyx_memviewslice *memviewslice, - PyObject *original_obj); - -/* ObjectToMemviewSlice.proto */ -static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_dc_unsigned_char__const__(PyObject *, int writable_flag); - -/* MemviewDtypeToObject.proto */ -static CYTHON_INLINE PyObject *__pyx_memview_get_unsigned_char__const__(const char *itemp); - -/* MemviewSliceCopyTemplate.proto */ -static __Pyx_memviewslice -__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs, - const char *mode, int ndim, - size_t sizeof_dtype, int contig_flag, - int dtype_is_object); - -/* MemviewSliceInit.proto */ -#define __Pyx_BUF_MAX_NDIMS %(BUF_MAX_NDIMS)d -#define __Pyx_MEMVIEW_DIRECT 1 -#define __Pyx_MEMVIEW_PTR 2 -#define __Pyx_MEMVIEW_FULL 4 -#define __Pyx_MEMVIEW_CONTIG 8 -#define __Pyx_MEMVIEW_STRIDED 16 -#define __Pyx_MEMVIEW_FOLLOW 32 -#define __Pyx_IS_C_CONTIG 1 -#define __Pyx_IS_F_CONTIG 2 -static int __Pyx_init_memviewslice( - struct __pyx_memoryview_obj *memview, - int ndim, - __Pyx_memviewslice *memviewslice, - int memview_is_new_reference); -static CYTHON_INLINE int __pyx_add_acquisition_count_locked( - __pyx_atomic_int_type *acquisition_count, PyThread_type_lock lock); -static CYTHON_INLINE int __pyx_sub_acquisition_count_locked( - __pyx_atomic_int_type *acquisition_count, PyThread_type_lock lock); -#define __pyx_get_slice_count_pointer(memview) (&memview->acquisition_count) -#define __PYX_INC_MEMVIEW(slice, have_gil) __Pyx_INC_MEMVIEW(slice, have_gil, __LINE__) -#define __PYX_XCLEAR_MEMVIEW(slice, have_gil) __Pyx_XCLEAR_MEMVIEW(slice, have_gil, __LINE__) -static CYTHON_INLINE void __Pyx_INC_MEMVIEW(__Pyx_memviewslice *, int, int); -static CYTHON_INLINE void __Pyx_XCLEAR_MEMVIEW(__Pyx_memviewslice *, int, int); - -/* CIntFromPy.proto */ -static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); - -/* CIntFromPy.proto */ -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int64_t(int64_t value); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_char(unsigned char value); - -/* CIntFromPy.proto */ -static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); - -/* CIntToPy.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); - -/* CIntFromPy.proto */ -static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *); - -/* FormatTypeName.proto */ -#if CYTHON_COMPILING_IN_LIMITED_API -typedef PyObject *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%U" -static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp); -#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj) -#else -typedef const char *__Pyx_TypeName; -#define __Pyx_FMT_TYPENAME "%.200s" -#define __Pyx_PyType_GetName(tp) ((tp)->tp_name) -#define __Pyx_DECREF_TypeName(obj) -#endif - -/* PyObjectCall2Args.proto */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2); - -/* PyObjectCallMethod1.proto */ -static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg); - -/* CoroutineBase.proto */ -struct __pyx_CoroutineObject; -typedef PyObject *(*__pyx_coroutine_body_t)(struct __pyx_CoroutineObject *, PyThreadState *, PyObject *); -#if CYTHON_USE_EXC_INFO_STACK -#define __Pyx_ExcInfoStruct _PyErr_StackItem -#else -typedef struct { - PyObject *exc_type; - PyObject *exc_value; - PyObject *exc_traceback; -} __Pyx_ExcInfoStruct; -#endif -typedef struct __pyx_CoroutineObject { - PyObject_HEAD - __pyx_coroutine_body_t body; - PyObject *closure; - __Pyx_ExcInfoStruct gi_exc_state; - PyObject *gi_weakreflist; - PyObject *classobj; - PyObject *yieldfrom; - PyObject *gi_name; - PyObject *gi_qualname; - PyObject *gi_modulename; - PyObject *gi_code; - PyObject *gi_frame; - int resume_label; - char is_running; -} __pyx_CoroutineObject; -static __pyx_CoroutineObject *__Pyx__Coroutine_New( - PyTypeObject *type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name); -static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( - __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name); -static CYTHON_INLINE void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *self); -static int __Pyx_Coroutine_clear(PyObject *self); -static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value); -static PyObject *__Pyx_Coroutine_Close(PyObject *self); -static PyObject *__Pyx_Coroutine_Throw(PyObject *gen, PyObject *args); -#if CYTHON_USE_EXC_INFO_STACK -#define __Pyx_Coroutine_SwapException(self) -#define __Pyx_Coroutine_ResetAndClearException(self) __Pyx_Coroutine_ExceptionClear(&(self)->gi_exc_state) -#else -#define __Pyx_Coroutine_SwapException(self) {\ - __Pyx_ExceptionSwap(&(self)->gi_exc_state.exc_type, &(self)->gi_exc_state.exc_value, &(self)->gi_exc_state.exc_traceback);\ - __Pyx_Coroutine_ResetFrameBackpointer(&(self)->gi_exc_state);\ - } -#define __Pyx_Coroutine_ResetAndClearException(self) {\ - __Pyx_ExceptionReset((self)->gi_exc_state.exc_type, (self)->gi_exc_state.exc_value, (self)->gi_exc_state.exc_traceback);\ - (self)->gi_exc_state.exc_type = (self)->gi_exc_state.exc_value = (self)->gi_exc_state.exc_traceback = NULL;\ - } -#endif -#if CYTHON_FAST_THREAD_STATE -#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ - __Pyx_PyGen__FetchStopIterationValue(__pyx_tstate, pvalue) -#else -#define __Pyx_PyGen_FetchStopIterationValue(pvalue)\ - __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, pvalue) -#endif -static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *tstate, PyObject **pvalue); -static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state); - -/* PatchModuleWithCoroutine.proto */ -static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code); - -/* PatchGeneratorABC.proto */ -static int __Pyx_patch_abc(void); - -/* Generator.proto */ -#define __Pyx_Generator_USED -#define __Pyx_Generator_CheckExact(obj) __Pyx_IS_TYPE(obj, __pyx_GeneratorType) -#define __Pyx_Generator_New(body, code, closure, name, qualname, module_name)\ - __Pyx__Coroutine_New(__pyx_GeneratorType, body, code, closure, name, qualname, module_name) -static PyObject *__Pyx_Generator_Next(PyObject *self); -static int __pyx_Generator_init(PyObject *module); - -/* CheckBinaryVersion.proto */ -static int __Pyx_check_binary_version(void); - -/* InitStrings.proto */ -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); - -/* #### Code section: module_declarations ### */ -static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self); /* proto*/ -static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index); /* proto*/ -static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj); /* proto*/ -static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_dst, PyObject *__pyx_v_src); /* proto*/ -static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memoryview_obj *__pyx_v_self, struct __pyx_memoryview_obj *__pyx_v_dst, PyObject *__pyx_v_value); /* proto*/ -static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /* proto*/ -static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp); /* proto*/ -static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value); /* proto*/ -static PyObject *__pyx_memoryview__get_base(struct __pyx_memoryview_obj *__pyx_v_self); /* proto*/ -static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp); /* proto*/ -static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value); /* proto*/ -static PyObject *__pyx_memoryviewslice__get_base(struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto*/ -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_11ArrayBuffer_from_element(simdjson::dom::array __pyx_v_src, PyObject *__pyx_v_of_type); /* proto*/ -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_5Array_from_element(struct __pyx_obj_9csimdjson_Parser *__pyx_v_parser, simdjson::dom::element __pyx_v_src); /* proto*/ -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_6Object_from_element(struct __pyx_obj_9csimdjson_Parser *__pyx_v_parser, simdjson::dom::element __pyx_v_src); /* proto*/ - -/* Module declarations from "libcpp" */ - -/* Module declarations from "libcpp.memory" */ - -/* Module declarations from "libc.string" */ - -/* Module declarations from "libc.stdio" */ - -/* Module declarations from "__builtin__" */ - -/* Module declarations from "cpython.type" */ - -/* Module declarations from "cpython" */ - -/* Module declarations from "cpython.object" */ - -/* Module declarations from "cpython.ref" */ - -/* Module declarations from "cpython.list" */ - -/* Module declarations from "cpython.bytes" */ - -/* Module declarations from "cpython.slice" */ - -/* Module declarations from "cpython.mem" */ - -/* Module declarations from "cpython.buffer" */ - -/* Module declarations from "libc.stdint" */ - -/* Module declarations from "libcpp.string" */ - -/* Module declarations from "simdjson.csimdjson" */ - -/* Module declarations from "csimdjson" */ -static PyObject *__pyx_collections_abc_Sequence = 0; -static PyObject *generic = 0; -static PyObject *strided = 0; -static PyObject *indirect = 0; -static PyObject *contiguous = 0; -static PyObject *indirect_contiguous = 0; -static int __pyx_memoryview_thread_locks_used; -static PyThread_type_lock __pyx_memoryview_thread_locks[8]; -static PyObject *__pyx_f_9csimdjson_str_as_bytes(PyObject *); /*proto*/ -static PyObject *__pyx_f_9csimdjson_object_to_dict(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::object, int); /*proto*/ -static PyObject *__pyx_f_9csimdjson_array_to_list(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::array, int); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_element_to_primitive(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element, struct __pyx_opt_args_9csimdjson_element_to_primitive *__pyx_optional_args); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_convert_PyObject_string_to_py_std__in_string(std::string const &); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_convert_PyUnicode_string_to_py_std__in_string(std::string const &); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_convert_PyStr_string_to_py_std__in_string(std::string const &); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_convert_PyBytes_string_to_py_std__in_string(std::string const &); /*proto*/ -static CYTHON_INLINE PyObject *__pyx_convert_PyByteArray_string_to_py_std__in_string(std::string const &); /*proto*/ -static int __pyx_array_allocate_buffer(struct __pyx_array_obj *); /*proto*/ -static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/ -static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/ -static CYTHON_INLINE int __pyx_memoryview_check(PyObject *); /*proto*/ -static PyObject *_unellipsify(PyObject *, int); /*proto*/ -static int assert_direct_dimensions(Py_ssize_t *, int); /*proto*/ -static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_obj *, PyObject *); /*proto*/ -static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *, Py_ssize_t, Py_ssize_t, Py_ssize_t, int, int, int *, Py_ssize_t, Py_ssize_t, Py_ssize_t, int, int, int, int); /*proto*/ -static char *__pyx_pybuffer_index(Py_buffer *, char *, Py_ssize_t, Py_ssize_t); /*proto*/ -static int __pyx_memslice_transpose(__Pyx_memviewslice *); /*proto*/ -static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice, int, PyObject *(*)(char *), int (*)(char *, PyObject *), int); /*proto*/ -static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/ -static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/ -static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *); /*proto*/ -static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/ -static Py_ssize_t abs_py_ssize_t(Py_ssize_t); /*proto*/ -static char __pyx_get_best_slice_order(__Pyx_memviewslice *, int); /*proto*/ -static void _copy_strided_to_strided(char *, Py_ssize_t *, char *, Py_ssize_t *, Py_ssize_t *, Py_ssize_t *, int, size_t); /*proto*/ -static void copy_strided_to_strided(__Pyx_memviewslice *, __Pyx_memviewslice *, int, size_t); /*proto*/ -static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *, int); /*proto*/ -static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *, Py_ssize_t *, Py_ssize_t, int, char); /*proto*/ -static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *, __Pyx_memviewslice *, char, int); /*proto*/ -static int __pyx_memoryview_err_extents(int, Py_ssize_t, Py_ssize_t); /*proto*/ -static int __pyx_memoryview_err_dim(PyObject *, PyObject *, int); /*proto*/ -static int __pyx_memoryview_err(PyObject *, PyObject *); /*proto*/ -static int __pyx_memoryview_err_no_memory(void); /*proto*/ -static int __pyx_memoryview_copy_contents(__Pyx_memviewslice, __Pyx_memviewslice, int, int, int); /*proto*/ -static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *, int, int); /*proto*/ -static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *, int, int, int); /*proto*/ -static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *, Py_ssize_t *, Py_ssize_t *, int, int); /*proto*/ -static void __pyx_memoryview_refcount_objects_in_slice(char *, Py_ssize_t *, Py_ssize_t *, int, int); /*proto*/ -static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *, int, size_t, void *, int); /*proto*/ -static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize_t *, int, size_t, void *); /*proto*/ -static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/ -/* #### Code section: typeinfo ### */ -static __Pyx_TypeInfo __Pyx_TypeInfo_unsigned_char__const__ = { "const unsigned char", NULL, sizeof(unsigned char const ), { 0 }, 0, __PYX_IS_UNSIGNED(unsigned char const ) ? 'U' : 'I', __PYX_IS_UNSIGNED(unsigned char const ), 0 }; -/* #### Code section: before_global_var ### */ -#define __Pyx_MODULE_NAME "csimdjson" -extern int __pyx_module_is_main_csimdjson; -int __pyx_module_is_main_csimdjson = 0; - -/* Implementation of "csimdjson" */ -/* #### Code section: global_var ### */ -static PyObject *__pyx_builtin_ValueError; -static PyObject *__pyx_builtin_MemoryError; -static PyObject *__pyx_builtin_TypeError; -static PyObject *__pyx_builtin_enumerate; -static PyObject *__pyx_builtin_range; -static PyObject *__pyx_builtin_KeyError; -static PyObject *__pyx_builtin_RuntimeError; -static PyObject *__pyx_builtin___import__; -static PyObject *__pyx_builtin_AssertionError; -static PyObject *__pyx_builtin_Ellipsis; -static PyObject *__pyx_builtin_id; -static PyObject *__pyx_builtin_IndexError; -/* #### Code section: string_decls ### */ -static const char __pyx_k_[] = ": "; -static const char __pyx_k_O[] = "O"; -static const char __pyx_k_c[] = "c"; -static const char __pyx_k_d[] = "d"; -static const char __pyx_k_i[] = "i"; -static const char __pyx_k_u[] = "u"; -static const char __pyx_k__2[] = "."; -static const char __pyx_k__3[] = "*"; -static const char __pyx_k__6[] = "'"; -static const char __pyx_k__7[] = ")"; -static const char __pyx_k_gc[] = "gc"; -static const char __pyx_k_id[] = "id"; -static const char __pyx_k_it[] = "it"; -static const char __pyx_k__59[] = "?"; -static const char __pyx_k_abc[] = "abc"; -static const char __pyx_k_and[] = " and "; -static const char __pyx_k_get[] = "get"; -static const char __pyx_k_got[] = " (got "; -static const char __pyx_k_key[] = "key"; -static const char __pyx_k_new[] = "__new__"; -static const char __pyx_k_obj[] = "obj"; -static const char __pyx_k_src[] = "src"; -static const char __pyx_k_sys[] = "sys"; -static const char __pyx_k_Path[] = "Path"; -static const char __pyx_k_args[] = "args"; -static const char __pyx_k_base[] = "base"; -static const char __pyx_k_data[] = "data"; -static const char __pyx_k_dict[] = "__dict__"; -static const char __pyx_k_impl[] = "impl"; -static const char __pyx_k_iter[] = "__iter__"; -static const char __pyx_k_keys[] = "keys"; -static const char __pyx_k_load[] = "load"; -static const char __pyx_k_main[] = "__main__"; -static const char __pyx_k_mode[] = "mode"; -static const char __pyx_k_name[] = "name"; -static const char __pyx_k_ndim[] = "ndim"; -static const char __pyx_k_pack[] = "pack"; -static const char __pyx_k_path[] = "path"; -static const char __pyx_k_self[] = "self"; -static const char __pyx_k_send[] = "send"; -static const char __pyx_k_size[] = "size"; -static const char __pyx_k_spec[] = "__spec__"; -static const char __pyx_k_step[] = "step"; -static const char __pyx_k_stop[] = "stop"; -static const char __pyx_k_test[] = "__test__"; -static const char __pyx_k_ASCII[] = "ASCII"; -static const char __pyx_k_Array[] = "Array"; -static const char __pyx_k_class[] = "__class__"; -static const char __pyx_k_close[] = "close"; -static const char __pyx_k_count[] = "count"; -static const char __pyx_k_error[] = "error"; -static const char __pyx_k_flags[] = "flags"; -static const char __pyx_k_index[] = "index"; -static const char __pyx_k_items[] = "items"; -static const char __pyx_k_parse[] = "parse"; -static const char __pyx_k_range[] = "range"; -static const char __pyx_k_shape[] = "shape"; -static const char __pyx_k_start[] = "start"; -static const char __pyx_k_throw[] = "throw"; -static const char __pyx_k_utf_8[] = "utf-8"; -static const char __pyx_k_Object[] = "Object"; -static const char __pyx_k_Parser[] = "Parser"; -static const char __pyx_k_enable[] = "enable"; -static const char __pyx_k_encode[] = "encode"; -static const char __pyx_k_format[] = "format"; -static const char __pyx_k_import[] = "__import__"; -static const char __pyx_k_name_2[] = "__name__"; -static const char __pyx_k_pickle[] = "pickle"; -static const char __pyx_k_reduce[] = "__reduce__"; -static const char __pyx_k_struct[] = "struct"; -static const char __pyx_k_unpack[] = "unpack"; -static const char __pyx_k_update[] = "update"; -static const char __pyx_k_values[] = "values"; -static const char __pyx_k_PADDING[] = "PADDING"; -static const char __pyx_k_VERSION[] = "VERSION"; -static const char __pyx_k_as_dict[] = "as_dict"; -static const char __pyx_k_as_list[] = "as_list"; -static const char __pyx_k_default[] = "default"; -static const char __pyx_k_disable[] = "disable"; -static const char __pyx_k_fortran[] = "fortran"; -static const char __pyx_k_memview[] = "memview"; -static const char __pyx_k_of_type[] = "of_type"; -static const char __pyx_k_pathlib[] = "pathlib"; -static const char __pyx_k_Ellipsis[] = "Ellipsis"; -static const char __pyx_k_KeyError[] = "KeyError"; -static const char __pyx_k_Sequence[] = "Sequence"; -static const char __pyx_k_document[] = "document"; -static const char __pyx_k_getstate[] = "__getstate__"; -static const char __pyx_k_itemsize[] = "itemsize"; -static const char __pyx_k_pyx_type[] = "__pyx_type"; -static const char __pyx_k_register[] = "register"; -static const char __pyx_k_setstate[] = "__setstate__"; -static const char __pyx_k_str_data[] = "str_data"; -static const char __pyx_k_str_size[] = "str_size"; -static const char __pyx_k_TypeError[] = "TypeError"; -static const char __pyx_k_as_buffer[] = "as_buffer"; -static const char __pyx_k_csimdjson[] = "csimdjson"; -static const char __pyx_k_enumerate[] = "enumerate"; -static const char __pyx_k_isenabled[] = "isenabled"; -static const char __pyx_k_pyx_state[] = "__pyx_state"; -static const char __pyx_k_recursive[] = "recursive"; -static const char __pyx_k_reduce_ex[] = "__reduce_ex__"; -static const char __pyx_k_IndexError[] = "IndexError"; -static const char __pyx_k_Object_get[] = "Object.get"; -static const char __pyx_k_ValueError[] = "ValueError"; -static const char __pyx_k_at_pointer[] = "at_pointer"; -static const char __pyx_k_bytes_data[] = "bytes_data"; -static const char __pyx_k_pyx_result[] = "__pyx_result"; -static const char __pyx_k_pyx_vtable[] = "__pyx_vtable__"; -static const char __pyx_k_ArrayBuffer[] = "ArrayBuffer"; -static const char __pyx_k_MemoryError[] = "MemoryError"; -static const char __pyx_k_Parser_load[] = "Parser.load"; -static const char __pyx_k_PickleError[] = "PickleError"; -static const char __pyx_k_collections[] = "collections"; -static const char __pyx_k_Array___iter[] = "Array.__iter__"; -static const char __pyx_k_Object_items[] = "Object.items"; -static const char __pyx_k_Parser_parse[] = "Parser.parse"; -static const char __pyx_k_RuntimeError[] = "RuntimeError"; -static const char __pyx_k_initializing[] = "_initializing"; -static const char __pyx_k_is_coroutine[] = "_is_coroutine"; -static const char __pyx_k_json_pointer[] = "json_pointer"; -static const char __pyx_k_max_capacity[] = "max_capacity"; -static const char __pyx_k_pyx_checksum[] = "__pyx_checksum"; -static const char __pyx_k_stringsource[] = ""; -static const char __pyx_k_version_info[] = "version_info"; -static const char __pyx_k_Array_as_list[] = "Array.as_list"; -static const char __pyx_k_MAXSIZE_BYTES[] = "MAXSIZE_BYTES"; -static const char __pyx_k_Object___iter[] = "Object.__iter__"; -static const char __pyx_k_Object_values[] = "Object.values"; -static const char __pyx_k_class_getitem[] = "__class_getitem__"; -static const char __pyx_k_reduce_cython[] = "__reduce_cython__"; -static const char __pyx_k_AssertionError[] = "AssertionError"; -static const char __pyx_k_Object_as_dict[] = "Object.as_dict"; -static const char __pyx_k_Array_as_buffer[] = "Array.as_buffer"; -static const char __pyx_k_View_MemoryView[] = "View.MemoryView"; -static const char __pyx_k_allocate_buffer[] = "allocate_buffer"; -static const char __pyx_k_collections_abc[] = "collections.abc"; -static const char __pyx_k_dtype_is_object[] = "dtype_is_object"; -static const char __pyx_k_pyx_PickleError[] = "__pyx_PickleError"; -static const char __pyx_k_setstate_cython[] = "__setstate_cython__"; -static const char __pyx_k_Array_at_pointer[] = "Array.at_pointer"; -static const char __pyx_k_Object_at_pointer[] = "Object.at_pointer"; -static const char __pyx_k_pyx_unpickle_Enum[] = "__pyx_unpickle_Enum"; -static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines"; -static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; -static const char __pyx_k_strided_and_direct[] = ""; -static const char __pyx_k_EMPTY_no_JSON_found[] = "EMPTY: no JSON found"; -static const char __pyx_k_get_implementations[] = "get_implementations"; -static const char __pyx_k_strided_and_indirect[] = ""; -static const char __pyx_k_supported_by_runtime[] = "supported_by_runtime"; -static const char __pyx_k_Array___reduce_cython[] = "Array.__reduce_cython__"; -static const char __pyx_k_Invalid_shape_in_axis[] = "Invalid shape in axis "; -static const char __pyx_k_contiguous_and_direct[] = ""; -static const char __pyx_k_Cannot_index_with_type[] = "Cannot index with type '"; -static const char __pyx_k_MemoryView_of_r_object[] = ""; -static const char __pyx_k_Object___reduce_cython[] = "Object.__reduce_cython__"; -static const char __pyx_k_Parser___reduce_cython[] = "Parser.__reduce_cython__"; -static const char __pyx_k_Unknown_Implementation[] = "Unknown Implementation"; -static const char __pyx_k_simdjson_csimdjson_pyx[] = "simdjson/csimdjson.pyx"; -static const char __pyx_k_Array___setstate_cython[] = "Array.__setstate_cython__"; -static const char __pyx_k_MemoryView_of_r_at_0x_x[] = ""; -static const char __pyx_k_contiguous_and_indirect[] = ""; -static const char __pyx_k_Object___setstate_cython[] = "Object.__setstate_cython__"; -static const char __pyx_k_Parser___setstate_cython[] = "Parser.__setstate_cython__"; -static const char __pyx_k_Dimension_d_is_not_direct[] = "Dimension %d is not direct"; -static const char __pyx_k_Index_out_of_bounds_axis_d[] = "Index out of bounds (axis %d)"; -static const char __pyx_k_Parser_get_implementations[] = "Parser.get_implementations"; -static const char __pyx_k_ArrayBuffer___reduce_cython[] = "ArrayBuffer.__reduce_cython__"; -static const char __pyx_k_Step_may_not_be_zero_axis_d[] = "Step may not be zero (axis %d)"; -static const char __pyx_k_itemsize_0_for_cython_array[] = "itemsize <= 0 for cython.array"; -static const char __pyx_k_of_type_must_be_one_of_d_i_u[] = "of_type must be one of {d,i,u}."; -static const char __pyx_k_ArrayBuffer___setstate_cython[] = "ArrayBuffer.__setstate_cython__"; -static const char __pyx_k_unable_to_allocate_array_data[] = "unable to allocate array data."; -static const char __pyx_k_strided_and_direct_or_indirect[] = ""; -static const char __pyx_k_All_dimensions_preceding_dimensi[] = "All dimensions preceding dimension %d must be indexed and not sliced"; -static const char __pyx_k_Attempted_to_set_a_runtime_Imple[] = "Attempted to set a runtime Implementation that is notsupported on the current host."; -static const char __pyx_k_Buffer_view_does_not_expose_stri[] = "Buffer view does not expose strides"; -static const char __pyx_k_Can_only_create_a_buffer_that_is[] = "Can only create a buffer that is contiguous in memory."; -static const char __pyx_k_Cannot_assign_to_read_only_memor[] = "Cannot assign to read-only memoryview"; -static const char __pyx_k_Cannot_create_writable_memory_vi[] = "Cannot create writable memory view from read-only memoryview"; -static const char __pyx_k_Cannot_transpose_memoryview_with[] = "Cannot transpose memoryview with indirect dimensions"; -static const char __pyx_k_Empty_shape_tuple_for_cython_arr[] = "Empty shape tuple for cython.array"; -static const char __pyx_k_Encountered_an_unknown_element_t[] = "Encountered an unknown element_type."; -static const char __pyx_k_Incompatible_checksums_0x_x_vs_0[] = "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))"; -static const char __pyx_k_Indirect_dimensions_not_supporte[] = "Indirect dimensions not supported"; -static const char __pyx_k_Invalid_mode_expected_c_or_fortr[] = "Invalid mode, expected 'c' or 'fortran', got "; -static const char __pyx_k_Out_of_bounds_on_buffer_access_a[] = "Out of bounds on buffer access (axis "; -static const char __pyx_k_Tried_to_re_use_a_parser_while_s[] = "Tried to re-use a parser while simdjson.Object and/or simdjson.Array objects still exist referencing the old parser."; -static const char __pyx_k_Unable_to_convert_item_to_object[] = "Unable to convert item to object"; -static const char __pyx_k_got_differing_extents_in_dimensi[] = "got differing extents in dimension "; -static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__"; -static const char __pyx_k_self_c_element_self_c_parser_can[] = "self.c_element,self.c_parser cannot be converted to a Python object for pickling"; -static const char __pyx_k_unable_to_allocate_shape_and_str[] = "unable to allocate shape and strides."; -/* #### Code section: decls ### */ -static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */ -static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ -static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct __pyx_array_obj *__pyx_v_self); /* proto */ -static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(struct __pyx_array_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_attr); /* proto */ -static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item); /* proto */ -static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value); /* proto */ -static PyObject *__pyx_pf___pyx_array___reduce_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v_name); /* proto */ -static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(struct __pyx_MemviewEnum_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_MemviewEnum___reduce_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v___pyx_state); /* proto */ -static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj, int __pyx_v_flags, int __pyx_v_dtype_is_object); /* proto */ -static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index); /* proto */ -static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /* proto */ -static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(struct __pyx_memoryview_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_memoryview___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_memoryviewslice___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v___pyx_type, long __pyx_v___pyx_checksum, PyObject *__pyx_v___pyx_state); /* proto */ -static int __pyx_pf_9csimdjson_11ArrayBuffer___cinit__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self); /* proto */ -static void __pyx_pf_9csimdjson_11ArrayBuffer_2__dealloc__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self); /* proto */ -static int __pyx_pf_9csimdjson_11ArrayBuffer_4__getbuffer__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self, Py_buffer *__pyx_v_buffer, int __pyx_v_flags); /* proto */ -static void __pyx_pf_9csimdjson_11ArrayBuffer_6__releasebuffer__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self, CYTHON_UNUSED Py_buffer *__pyx_v_buffer); /* proto */ -static PyObject *__pyx_pf_9csimdjson_11ArrayBuffer_4size___get__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_11ArrayBuffer_8__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_11ArrayBuffer_10__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array___getitem__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self, PyObject *__pyx_v_key); /* proto */ -static Py_ssize_t __pyx_pf_9csimdjson_5Array_2__len__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_4__iter__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_7at_pointer(struct __pyx_obj_9csimdjson_Array *__pyx_v_self, PyObject *__pyx_v_json_pointer); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_9as_list(struct __pyx_obj_9csimdjson_Array *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_11as_buffer(struct __pyx_obj_9csimdjson_Array *__pyx_v_self, PyObject *__pyx_v_of_type); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_4mini___get__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_6parser___get__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_13__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Array *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_5Array_15__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Array *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object___getitem__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_key); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_2get(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_key, PyObject *__pyx_v_default); /* proto */ -static Py_ssize_t __pyx_pf_9csimdjson_6Object_4__len__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static int __pyx_pf_9csimdjson_6Object_6__contains__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_key); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_8__iter__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_11values(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_14items(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_17at_pointer(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_json_pointer); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_19as_dict(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_4mini___get__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_6parser___get__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_21__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Object *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Object_23__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Object *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static int __pyx_pf_9csimdjson_6Parser___cinit__(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, size_t __pyx_v_max_capacity); /* proto */ -static void __pyx_pf_9csimdjson_6Parser_2__dealloc__(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Parser_4parse(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_src, int __pyx_v_recursive); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Parser_6load(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_path, int __pyx_v_recursive); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Parser_8get_implementations(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_supported_by_runtime); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Parser_14implementation___get__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self); /* proto */ -static int __pyx_pf_9csimdjson_6Parser_14implementation_2__set__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_name); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Parser_11__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self); /* proto */ -static PyObject *__pyx_pf_9csimdjson_6Parser_13__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */ -static PyObject *__pyx_tp_new_9csimdjson_ArrayBuffer(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson_Array(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson_Object(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson_Parser(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct____iter__(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_1___iter__(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_2_values(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_3_items(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_4_get_implementations(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_array(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_Enum(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new_memoryview(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -static PyObject *__pyx_tp_new__memoryviewslice(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/ -/* #### Code section: late_includes ### */ -/* #### Code section: module_state ### */ -typedef struct { - PyObject *__pyx_d; - PyObject *__pyx_b; - PyObject *__pyx_cython_runtime; - PyObject *__pyx_empty_tuple; - PyObject *__pyx_empty_bytes; - PyObject *__pyx_empty_unicode; - #ifdef __Pyx_CyFunction_USED - PyTypeObject *__pyx_CyFunctionType; - #endif - #ifdef __Pyx_FusedFunction_USED - PyTypeObject *__pyx_FusedFunctionType; - #endif - #ifdef __Pyx_Generator_USED - PyTypeObject *__pyx_GeneratorType; - #endif - #ifdef __Pyx_IterableCoroutine_USED - PyTypeObject *__pyx_IterableCoroutineType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineAwaitType; - #endif - #ifdef __Pyx_Coroutine_USED - PyTypeObject *__pyx_CoroutineType; - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - PyTypeObject *__pyx_ptype_7cpython_4type_type; - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - #endif - #if CYTHON_USE_MODULE_STATE - PyObject *__pyx_type_9csimdjson_ArrayBuffer; - PyObject *__pyx_type_9csimdjson_Array; - PyObject *__pyx_type_9csimdjson_Object; - PyObject *__pyx_type_9csimdjson_Parser; - PyObject *__pyx_type_9csimdjson___pyx_scope_struct____iter__; - PyObject *__pyx_type_9csimdjson___pyx_scope_struct_1___iter__; - PyObject *__pyx_type_9csimdjson___pyx_scope_struct_2_values; - PyObject *__pyx_type_9csimdjson___pyx_scope_struct_3_items; - PyObject *__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations; - PyObject *__pyx_type___pyx_array; - PyObject *__pyx_type___pyx_MemviewEnum; - PyObject *__pyx_type___pyx_memoryview; - PyObject *__pyx_type___pyx_memoryviewslice; - #endif - PyTypeObject *__pyx_ptype_9csimdjson_ArrayBuffer; - PyTypeObject *__pyx_ptype_9csimdjson_Array; - PyTypeObject *__pyx_ptype_9csimdjson_Object; - PyTypeObject *__pyx_ptype_9csimdjson_Parser; - PyTypeObject *__pyx_ptype_9csimdjson___pyx_scope_struct____iter__; - PyTypeObject *__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__; - PyTypeObject *__pyx_ptype_9csimdjson___pyx_scope_struct_2_values; - PyTypeObject *__pyx_ptype_9csimdjson___pyx_scope_struct_3_items; - PyTypeObject *__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations; - PyTypeObject *__pyx_array_type; - PyTypeObject *__pyx_MemviewEnum_type; - PyTypeObject *__pyx_memoryview_type; - PyTypeObject *__pyx_memoryviewslice_type; - PyObject *__pyx_kp_u_; - PyObject *__pyx_n_s_ASCII; - PyObject *__pyx_kp_s_All_dimensions_preceding_dimensi; - PyObject *__pyx_n_s_Array; - PyObject *__pyx_n_s_ArrayBuffer; - PyObject *__pyx_n_s_ArrayBuffer___reduce_cython; - PyObject *__pyx_n_s_ArrayBuffer___setstate_cython; - PyObject *__pyx_n_s_Array___iter; - PyObject *__pyx_n_s_Array___reduce_cython; - PyObject *__pyx_n_s_Array___setstate_cython; - PyObject *__pyx_n_s_Array_as_buffer; - PyObject *__pyx_n_s_Array_as_list; - PyObject *__pyx_n_s_Array_at_pointer; - PyObject *__pyx_n_s_AssertionError; - PyObject *__pyx_kp_u_Attempted_to_set_a_runtime_Imple; - PyObject *__pyx_kp_s_Buffer_view_does_not_expose_stri; - PyObject *__pyx_kp_s_Can_only_create_a_buffer_that_is; - PyObject *__pyx_kp_s_Cannot_assign_to_read_only_memor; - PyObject *__pyx_kp_s_Cannot_create_writable_memory_vi; - PyObject *__pyx_kp_u_Cannot_index_with_type; - PyObject *__pyx_kp_s_Cannot_transpose_memoryview_with; - PyObject *__pyx_kp_s_Dimension_d_is_not_direct; - PyObject *__pyx_kp_u_EMPTY_no_JSON_found; - PyObject *__pyx_n_s_Ellipsis; - PyObject *__pyx_kp_s_Empty_shape_tuple_for_cython_arr; - PyObject *__pyx_kp_u_Encountered_an_unknown_element_t; - PyObject *__pyx_kp_s_Incompatible_checksums_0x_x_vs_0; - PyObject *__pyx_n_s_IndexError; - PyObject *__pyx_kp_s_Index_out_of_bounds_axis_d; - PyObject *__pyx_kp_s_Indirect_dimensions_not_supporte; - PyObject *__pyx_kp_u_Invalid_mode_expected_c_or_fortr; - PyObject *__pyx_kp_u_Invalid_shape_in_axis; - PyObject *__pyx_n_s_KeyError; - PyObject *__pyx_n_s_MAXSIZE_BYTES; - PyObject *__pyx_n_s_MemoryError; - PyObject *__pyx_kp_s_MemoryView_of_r_at_0x_x; - PyObject *__pyx_kp_s_MemoryView_of_r_object; - PyObject *__pyx_n_b_O; - PyObject *__pyx_n_s_Object; - PyObject *__pyx_n_s_Object___iter; - PyObject *__pyx_n_s_Object___reduce_cython; - PyObject *__pyx_n_s_Object___setstate_cython; - PyObject *__pyx_n_s_Object_as_dict; - PyObject *__pyx_n_s_Object_at_pointer; - PyObject *__pyx_n_s_Object_get; - PyObject *__pyx_n_s_Object_items; - PyObject *__pyx_n_s_Object_values; - PyObject *__pyx_kp_u_Out_of_bounds_on_buffer_access_a; - PyObject *__pyx_n_s_PADDING; - PyObject *__pyx_n_s_Parser; - PyObject *__pyx_n_s_Parser___reduce_cython; - PyObject *__pyx_n_s_Parser___setstate_cython; - PyObject *__pyx_n_s_Parser_get_implementations; - PyObject *__pyx_n_s_Parser_load; - PyObject *__pyx_n_s_Parser_parse; - PyObject *__pyx_n_s_Path; - PyObject *__pyx_n_s_PickleError; - PyObject *__pyx_n_s_RuntimeError; - PyObject *__pyx_n_s_Sequence; - PyObject *__pyx_kp_s_Step_may_not_be_zero_axis_d; - PyObject *__pyx_kp_u_Tried_to_re_use_a_parser_while_s; - PyObject *__pyx_n_s_TypeError; - PyObject *__pyx_kp_s_Unable_to_convert_item_to_object; - PyObject *__pyx_kp_u_Unknown_Implementation; - PyObject *__pyx_n_s_VERSION; - PyObject *__pyx_n_s_ValueError; - PyObject *__pyx_n_s_View_MemoryView; - PyObject *__pyx_kp_u__2; - PyObject *__pyx_n_s__3; - PyObject *__pyx_n_s__59; - PyObject *__pyx_kp_u__6; - PyObject *__pyx_kp_u__7; - PyObject *__pyx_n_s_abc; - PyObject *__pyx_n_s_allocate_buffer; - PyObject *__pyx_kp_u_and; - PyObject *__pyx_n_s_args; - PyObject *__pyx_n_s_as_buffer; - PyObject *__pyx_n_s_as_dict; - PyObject *__pyx_n_s_as_list; - PyObject *__pyx_n_s_asyncio_coroutines; - PyObject *__pyx_n_s_at_pointer; - PyObject *__pyx_n_s_base; - PyObject *__pyx_n_s_bytes_data; - PyObject *__pyx_n_s_c; - PyObject *__pyx_n_u_c; - PyObject *__pyx_n_s_class; - PyObject *__pyx_n_s_class_getitem; - PyObject *__pyx_n_s_cline_in_traceback; - PyObject *__pyx_n_s_close; - PyObject *__pyx_n_s_collections; - PyObject *__pyx_kp_s_collections_abc; - PyObject *__pyx_kp_s_contiguous_and_direct; - PyObject *__pyx_kp_s_contiguous_and_indirect; - PyObject *__pyx_n_s_count; - PyObject *__pyx_n_s_csimdjson; - PyObject *__pyx_n_u_d; - PyObject *__pyx_n_s_data; - PyObject *__pyx_n_s_default; - PyObject *__pyx_n_s_dict; - PyObject *__pyx_kp_u_disable; - PyObject *__pyx_n_s_document; - PyObject *__pyx_n_s_dtype_is_object; - PyObject *__pyx_kp_u_enable; - PyObject *__pyx_n_s_encode; - PyObject *__pyx_n_s_enumerate; - PyObject *__pyx_n_s_error; - PyObject *__pyx_n_s_flags; - PyObject *__pyx_n_s_format; - PyObject *__pyx_n_s_fortran; - PyObject *__pyx_n_u_fortran; - PyObject *__pyx_kp_u_gc; - PyObject *__pyx_n_s_get; - PyObject *__pyx_n_s_get_implementations; - PyObject *__pyx_n_s_getstate; - PyObject *__pyx_kp_u_got; - PyObject *__pyx_kp_u_got_differing_extents_in_dimensi; - PyObject *__pyx_n_u_i; - PyObject *__pyx_n_s_id; - PyObject *__pyx_n_s_impl; - PyObject *__pyx_n_s_import; - PyObject *__pyx_n_s_index; - PyObject *__pyx_n_s_initializing; - PyObject *__pyx_n_s_is_coroutine; - PyObject *__pyx_kp_u_isenabled; - PyObject *__pyx_n_s_it; - PyObject *__pyx_n_s_items; - PyObject *__pyx_n_s_itemsize; - PyObject *__pyx_kp_s_itemsize_0_for_cython_array; - PyObject *__pyx_n_s_iter; - PyObject *__pyx_n_s_json_pointer; - PyObject *__pyx_n_s_key; - PyObject *__pyx_n_s_keys; - PyObject *__pyx_n_s_load; - PyObject *__pyx_n_s_main; - PyObject *__pyx_n_s_max_capacity; - PyObject *__pyx_n_s_memview; - PyObject *__pyx_n_s_mode; - PyObject *__pyx_n_s_name; - PyObject *__pyx_n_s_name_2; - PyObject *__pyx_n_s_ndim; - PyObject *__pyx_n_s_new; - PyObject *__pyx_kp_s_no_default___reduce___due_to_non; - PyObject *__pyx_n_s_obj; - PyObject *__pyx_n_s_of_type; - PyObject *__pyx_kp_u_of_type_must_be_one_of_d_i_u; - PyObject *__pyx_n_s_pack; - PyObject *__pyx_n_s_parse; - PyObject *__pyx_n_s_path; - PyObject *__pyx_n_s_pathlib; - PyObject *__pyx_n_s_pickle; - PyObject *__pyx_n_s_pyx_PickleError; - PyObject *__pyx_n_s_pyx_checksum; - PyObject *__pyx_n_s_pyx_result; - PyObject *__pyx_n_s_pyx_state; - PyObject *__pyx_n_s_pyx_type; - PyObject *__pyx_n_s_pyx_unpickle_Enum; - PyObject *__pyx_n_s_pyx_vtable; - PyObject *__pyx_n_s_range; - PyObject *__pyx_n_s_recursive; - PyObject *__pyx_n_s_reduce; - PyObject *__pyx_n_s_reduce_cython; - PyObject *__pyx_n_s_reduce_ex; - PyObject *__pyx_n_s_register; - PyObject *__pyx_n_s_self; - PyObject *__pyx_kp_s_self_c_element_self_c_parser_can; - PyObject *__pyx_n_s_send; - PyObject *__pyx_n_s_setstate; - PyObject *__pyx_n_s_setstate_cython; - PyObject *__pyx_n_s_shape; - PyObject *__pyx_kp_s_simdjson_csimdjson_pyx; - PyObject *__pyx_n_s_size; - PyObject *__pyx_n_s_spec; - PyObject *__pyx_n_s_src; - PyObject *__pyx_n_s_start; - PyObject *__pyx_n_s_step; - PyObject *__pyx_n_s_stop; - PyObject *__pyx_n_s_str_data; - PyObject *__pyx_n_s_str_size; - PyObject *__pyx_kp_s_strided_and_direct; - PyObject *__pyx_kp_s_strided_and_direct_or_indirect; - PyObject *__pyx_kp_s_strided_and_indirect; - PyObject *__pyx_kp_s_stringsource; - PyObject *__pyx_n_s_struct; - PyObject *__pyx_n_s_supported_by_runtime; - PyObject *__pyx_n_s_sys; - PyObject *__pyx_n_s_test; - PyObject *__pyx_n_s_throw; - PyObject *__pyx_n_u_u; - PyObject *__pyx_kp_s_unable_to_allocate_array_data; - PyObject *__pyx_kp_s_unable_to_allocate_shape_and_str; - PyObject *__pyx_n_s_unpack; - PyObject *__pyx_n_s_update; - PyObject *__pyx_kp_u_utf_8; - PyObject *__pyx_n_s_values; - PyObject *__pyx_n_s_version_info; - PyObject *__pyx_int_0; - PyObject *__pyx_int_1; - PyObject *__pyx_int_3; - PyObject *__pyx_int_112105877; - PyObject *__pyx_int_136983863; - PyObject *__pyx_int_184977713; - PyObject *__pyx_int_neg_1; - size_t __pyx_k__13; - PyObject *__pyx_slice__5; - PyObject *__pyx_tuple__4; - PyObject *__pyx_tuple__8; - PyObject *__pyx_tuple__9; - PyObject *__pyx_tuple__10; - PyObject *__pyx_tuple__14; - PyObject *__pyx_tuple__15; - PyObject *__pyx_tuple__17; - PyObject *__pyx_tuple__18; - PyObject *__pyx_tuple__19; - PyObject *__pyx_tuple__20; - PyObject *__pyx_tuple__21; - PyObject *__pyx_tuple__22; - PyObject *__pyx_tuple__23; - PyObject *__pyx_tuple__24; - PyObject *__pyx_tuple__25; - PyObject *__pyx_tuple__26; - PyObject *__pyx_tuple__27; - PyObject *__pyx_tuple__28; - PyObject *__pyx_tuple__30; - PyObject *__pyx_tuple__32; - PyObject *__pyx_tuple__34; - PyObject *__pyx_tuple__37; - PyObject *__pyx_tuple__41; - PyObject *__pyx_tuple__43; - PyObject *__pyx_tuple__44; - PyObject *__pyx_tuple__45; - PyObject *__pyx_tuple__50; - PyObject *__pyx_tuple__52; - PyObject *__pyx_tuple__53; - PyObject *__pyx_tuple__55; - PyObject *__pyx_tuple__56; - PyObject *__pyx_codeobj__11; - PyObject *__pyx_codeobj__12; - PyObject *__pyx_codeobj__16; - PyObject *__pyx_codeobj__29; - PyObject *__pyx_codeobj__31; - PyObject *__pyx_codeobj__33; - PyObject *__pyx_codeobj__35; - PyObject *__pyx_codeobj__36; - PyObject *__pyx_codeobj__38; - PyObject *__pyx_codeobj__39; - PyObject *__pyx_codeobj__40; - PyObject *__pyx_codeobj__42; - PyObject *__pyx_codeobj__46; - PyObject *__pyx_codeobj__47; - PyObject *__pyx_codeobj__48; - PyObject *__pyx_codeobj__49; - PyObject *__pyx_codeobj__51; - PyObject *__pyx_codeobj__54; - PyObject *__pyx_codeobj__57; - PyObject *__pyx_codeobj__58; -} __pyx_mstate; - -#if CYTHON_USE_MODULE_STATE -#ifdef __cplusplus -namespace { - extern struct PyModuleDef __pyx_moduledef; -} /* anonymous namespace */ -#else -static struct PyModuleDef __pyx_moduledef; -#endif - -#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o)) - -#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef))) - -#define __pyx_m (PyState_FindModule(&__pyx_moduledef)) -#else -static __pyx_mstate __pyx_mstate_global_static = -#ifdef __cplusplus - {}; -#else - {0}; -#endif -static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static; -#endif -/* #### Code section: module_state_clear ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_clear(PyObject *m) { - __pyx_mstate *clear_module_state = __pyx_mstate(m); - if (!clear_module_state) return 0; - Py_CLEAR(clear_module_state->__pyx_d); - Py_CLEAR(clear_module_state->__pyx_b); - Py_CLEAR(clear_module_state->__pyx_cython_runtime); - Py_CLEAR(clear_module_state->__pyx_empty_tuple); - Py_CLEAR(clear_module_state->__pyx_empty_bytes); - Py_CLEAR(clear_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_CLEAR(clear_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_CLEAR(clear_module_state->__pyx_FusedFunctionType); - #endif - Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4type_type); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson_ArrayBuffer); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson_ArrayBuffer); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson_Array); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson_Array); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson_Object); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson_Object); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson_Parser); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson_Parser); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct____iter__); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson___pyx_scope_struct____iter__); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson___pyx_scope_struct_1___iter__); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_2_values); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson___pyx_scope_struct_2_values); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_3_items); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson___pyx_scope_struct_3_items); - Py_CLEAR(clear_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations); - Py_CLEAR(clear_module_state->__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations); - Py_CLEAR(clear_module_state->__pyx_array_type); - Py_CLEAR(clear_module_state->__pyx_type___pyx_array); - Py_CLEAR(clear_module_state->__pyx_MemviewEnum_type); - Py_CLEAR(clear_module_state->__pyx_type___pyx_MemviewEnum); - Py_CLEAR(clear_module_state->__pyx_memoryview_type); - Py_CLEAR(clear_module_state->__pyx_type___pyx_memoryview); - Py_CLEAR(clear_module_state->__pyx_memoryviewslice_type); - Py_CLEAR(clear_module_state->__pyx_type___pyx_memoryviewslice); - Py_CLEAR(clear_module_state->__pyx_kp_u_); - Py_CLEAR(clear_module_state->__pyx_n_s_ASCII); - Py_CLEAR(clear_module_state->__pyx_kp_s_All_dimensions_preceding_dimensi); - Py_CLEAR(clear_module_state->__pyx_n_s_Array); - Py_CLEAR(clear_module_state->__pyx_n_s_ArrayBuffer); - Py_CLEAR(clear_module_state->__pyx_n_s_ArrayBuffer___reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_ArrayBuffer___setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Array___iter); - Py_CLEAR(clear_module_state->__pyx_n_s_Array___reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Array___setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Array_as_buffer); - Py_CLEAR(clear_module_state->__pyx_n_s_Array_as_list); - Py_CLEAR(clear_module_state->__pyx_n_s_Array_at_pointer); - Py_CLEAR(clear_module_state->__pyx_n_s_AssertionError); - Py_CLEAR(clear_module_state->__pyx_kp_u_Attempted_to_set_a_runtime_Imple); - Py_CLEAR(clear_module_state->__pyx_kp_s_Buffer_view_does_not_expose_stri); - Py_CLEAR(clear_module_state->__pyx_kp_s_Can_only_create_a_buffer_that_is); - Py_CLEAR(clear_module_state->__pyx_kp_s_Cannot_assign_to_read_only_memor); - Py_CLEAR(clear_module_state->__pyx_kp_s_Cannot_create_writable_memory_vi); - Py_CLEAR(clear_module_state->__pyx_kp_u_Cannot_index_with_type); - Py_CLEAR(clear_module_state->__pyx_kp_s_Cannot_transpose_memoryview_with); - Py_CLEAR(clear_module_state->__pyx_kp_s_Dimension_d_is_not_direct); - Py_CLEAR(clear_module_state->__pyx_kp_u_EMPTY_no_JSON_found); - Py_CLEAR(clear_module_state->__pyx_n_s_Ellipsis); - Py_CLEAR(clear_module_state->__pyx_kp_s_Empty_shape_tuple_for_cython_arr); - Py_CLEAR(clear_module_state->__pyx_kp_u_Encountered_an_unknown_element_t); - Py_CLEAR(clear_module_state->__pyx_kp_s_Incompatible_checksums_0x_x_vs_0); - Py_CLEAR(clear_module_state->__pyx_n_s_IndexError); - Py_CLEAR(clear_module_state->__pyx_kp_s_Index_out_of_bounds_axis_d); - Py_CLEAR(clear_module_state->__pyx_kp_s_Indirect_dimensions_not_supporte); - Py_CLEAR(clear_module_state->__pyx_kp_u_Invalid_mode_expected_c_or_fortr); - Py_CLEAR(clear_module_state->__pyx_kp_u_Invalid_shape_in_axis); - Py_CLEAR(clear_module_state->__pyx_n_s_KeyError); - Py_CLEAR(clear_module_state->__pyx_n_s_MAXSIZE_BYTES); - Py_CLEAR(clear_module_state->__pyx_n_s_MemoryError); - Py_CLEAR(clear_module_state->__pyx_kp_s_MemoryView_of_r_at_0x_x); - Py_CLEAR(clear_module_state->__pyx_kp_s_MemoryView_of_r_object); - Py_CLEAR(clear_module_state->__pyx_n_b_O); - Py_CLEAR(clear_module_state->__pyx_n_s_Object); - Py_CLEAR(clear_module_state->__pyx_n_s_Object___iter); - Py_CLEAR(clear_module_state->__pyx_n_s_Object___reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Object___setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Object_as_dict); - Py_CLEAR(clear_module_state->__pyx_n_s_Object_at_pointer); - Py_CLEAR(clear_module_state->__pyx_n_s_Object_get); - Py_CLEAR(clear_module_state->__pyx_n_s_Object_items); - Py_CLEAR(clear_module_state->__pyx_n_s_Object_values); - Py_CLEAR(clear_module_state->__pyx_kp_u_Out_of_bounds_on_buffer_access_a); - Py_CLEAR(clear_module_state->__pyx_n_s_PADDING); - Py_CLEAR(clear_module_state->__pyx_n_s_Parser); - Py_CLEAR(clear_module_state->__pyx_n_s_Parser___reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Parser___setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_Parser_get_implementations); - Py_CLEAR(clear_module_state->__pyx_n_s_Parser_load); - Py_CLEAR(clear_module_state->__pyx_n_s_Parser_parse); - Py_CLEAR(clear_module_state->__pyx_n_s_Path); - Py_CLEAR(clear_module_state->__pyx_n_s_PickleError); - Py_CLEAR(clear_module_state->__pyx_n_s_RuntimeError); - Py_CLEAR(clear_module_state->__pyx_n_s_Sequence); - Py_CLEAR(clear_module_state->__pyx_kp_s_Step_may_not_be_zero_axis_d); - Py_CLEAR(clear_module_state->__pyx_kp_u_Tried_to_re_use_a_parser_while_s); - Py_CLEAR(clear_module_state->__pyx_n_s_TypeError); - Py_CLEAR(clear_module_state->__pyx_kp_s_Unable_to_convert_item_to_object); - Py_CLEAR(clear_module_state->__pyx_kp_u_Unknown_Implementation); - Py_CLEAR(clear_module_state->__pyx_n_s_VERSION); - Py_CLEAR(clear_module_state->__pyx_n_s_ValueError); - Py_CLEAR(clear_module_state->__pyx_n_s_View_MemoryView); - Py_CLEAR(clear_module_state->__pyx_kp_u__2); - Py_CLEAR(clear_module_state->__pyx_n_s__3); - Py_CLEAR(clear_module_state->__pyx_n_s__59); - Py_CLEAR(clear_module_state->__pyx_kp_u__6); - Py_CLEAR(clear_module_state->__pyx_kp_u__7); - Py_CLEAR(clear_module_state->__pyx_n_s_abc); - Py_CLEAR(clear_module_state->__pyx_n_s_allocate_buffer); - Py_CLEAR(clear_module_state->__pyx_kp_u_and); - Py_CLEAR(clear_module_state->__pyx_n_s_args); - Py_CLEAR(clear_module_state->__pyx_n_s_as_buffer); - Py_CLEAR(clear_module_state->__pyx_n_s_as_dict); - Py_CLEAR(clear_module_state->__pyx_n_s_as_list); - Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines); - Py_CLEAR(clear_module_state->__pyx_n_s_at_pointer); - Py_CLEAR(clear_module_state->__pyx_n_s_base); - Py_CLEAR(clear_module_state->__pyx_n_s_bytes_data); - Py_CLEAR(clear_module_state->__pyx_n_s_c); - Py_CLEAR(clear_module_state->__pyx_n_u_c); - Py_CLEAR(clear_module_state->__pyx_n_s_class); - Py_CLEAR(clear_module_state->__pyx_n_s_class_getitem); - Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback); - Py_CLEAR(clear_module_state->__pyx_n_s_close); - Py_CLEAR(clear_module_state->__pyx_n_s_collections); - Py_CLEAR(clear_module_state->__pyx_kp_s_collections_abc); - Py_CLEAR(clear_module_state->__pyx_kp_s_contiguous_and_direct); - Py_CLEAR(clear_module_state->__pyx_kp_s_contiguous_and_indirect); - Py_CLEAR(clear_module_state->__pyx_n_s_count); - Py_CLEAR(clear_module_state->__pyx_n_s_csimdjson); - Py_CLEAR(clear_module_state->__pyx_n_u_d); - Py_CLEAR(clear_module_state->__pyx_n_s_data); - Py_CLEAR(clear_module_state->__pyx_n_s_default); - Py_CLEAR(clear_module_state->__pyx_n_s_dict); - Py_CLEAR(clear_module_state->__pyx_kp_u_disable); - Py_CLEAR(clear_module_state->__pyx_n_s_document); - Py_CLEAR(clear_module_state->__pyx_n_s_dtype_is_object); - Py_CLEAR(clear_module_state->__pyx_kp_u_enable); - Py_CLEAR(clear_module_state->__pyx_n_s_encode); - Py_CLEAR(clear_module_state->__pyx_n_s_enumerate); - Py_CLEAR(clear_module_state->__pyx_n_s_error); - Py_CLEAR(clear_module_state->__pyx_n_s_flags); - Py_CLEAR(clear_module_state->__pyx_n_s_format); - Py_CLEAR(clear_module_state->__pyx_n_s_fortran); - Py_CLEAR(clear_module_state->__pyx_n_u_fortran); - Py_CLEAR(clear_module_state->__pyx_kp_u_gc); - Py_CLEAR(clear_module_state->__pyx_n_s_get); - Py_CLEAR(clear_module_state->__pyx_n_s_get_implementations); - Py_CLEAR(clear_module_state->__pyx_n_s_getstate); - Py_CLEAR(clear_module_state->__pyx_kp_u_got); - Py_CLEAR(clear_module_state->__pyx_kp_u_got_differing_extents_in_dimensi); - Py_CLEAR(clear_module_state->__pyx_n_u_i); - Py_CLEAR(clear_module_state->__pyx_n_s_id); - Py_CLEAR(clear_module_state->__pyx_n_s_impl); - Py_CLEAR(clear_module_state->__pyx_n_s_import); - Py_CLEAR(clear_module_state->__pyx_n_s_index); - Py_CLEAR(clear_module_state->__pyx_n_s_initializing); - Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine); - Py_CLEAR(clear_module_state->__pyx_kp_u_isenabled); - Py_CLEAR(clear_module_state->__pyx_n_s_it); - Py_CLEAR(clear_module_state->__pyx_n_s_items); - Py_CLEAR(clear_module_state->__pyx_n_s_itemsize); - Py_CLEAR(clear_module_state->__pyx_kp_s_itemsize_0_for_cython_array); - Py_CLEAR(clear_module_state->__pyx_n_s_iter); - Py_CLEAR(clear_module_state->__pyx_n_s_json_pointer); - Py_CLEAR(clear_module_state->__pyx_n_s_key); - Py_CLEAR(clear_module_state->__pyx_n_s_keys); - Py_CLEAR(clear_module_state->__pyx_n_s_load); - Py_CLEAR(clear_module_state->__pyx_n_s_main); - Py_CLEAR(clear_module_state->__pyx_n_s_max_capacity); - Py_CLEAR(clear_module_state->__pyx_n_s_memview); - Py_CLEAR(clear_module_state->__pyx_n_s_mode); - Py_CLEAR(clear_module_state->__pyx_n_s_name); - Py_CLEAR(clear_module_state->__pyx_n_s_name_2); - Py_CLEAR(clear_module_state->__pyx_n_s_ndim); - Py_CLEAR(clear_module_state->__pyx_n_s_new); - Py_CLEAR(clear_module_state->__pyx_kp_s_no_default___reduce___due_to_non); - Py_CLEAR(clear_module_state->__pyx_n_s_obj); - Py_CLEAR(clear_module_state->__pyx_n_s_of_type); - Py_CLEAR(clear_module_state->__pyx_kp_u_of_type_must_be_one_of_d_i_u); - Py_CLEAR(clear_module_state->__pyx_n_s_pack); - Py_CLEAR(clear_module_state->__pyx_n_s_parse); - Py_CLEAR(clear_module_state->__pyx_n_s_path); - Py_CLEAR(clear_module_state->__pyx_n_s_pathlib); - Py_CLEAR(clear_module_state->__pyx_n_s_pickle); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_PickleError); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_checksum); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_result); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_state); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_type); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_unpickle_Enum); - Py_CLEAR(clear_module_state->__pyx_n_s_pyx_vtable); - Py_CLEAR(clear_module_state->__pyx_n_s_range); - Py_CLEAR(clear_module_state->__pyx_n_s_recursive); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_reduce_ex); - Py_CLEAR(clear_module_state->__pyx_n_s_register); - Py_CLEAR(clear_module_state->__pyx_n_s_self); - Py_CLEAR(clear_module_state->__pyx_kp_s_self_c_element_self_c_parser_can); - Py_CLEAR(clear_module_state->__pyx_n_s_send); - Py_CLEAR(clear_module_state->__pyx_n_s_setstate); - Py_CLEAR(clear_module_state->__pyx_n_s_setstate_cython); - Py_CLEAR(clear_module_state->__pyx_n_s_shape); - Py_CLEAR(clear_module_state->__pyx_kp_s_simdjson_csimdjson_pyx); - Py_CLEAR(clear_module_state->__pyx_n_s_size); - Py_CLEAR(clear_module_state->__pyx_n_s_spec); - Py_CLEAR(clear_module_state->__pyx_n_s_src); - Py_CLEAR(clear_module_state->__pyx_n_s_start); - Py_CLEAR(clear_module_state->__pyx_n_s_step); - Py_CLEAR(clear_module_state->__pyx_n_s_stop); - Py_CLEAR(clear_module_state->__pyx_n_s_str_data); - Py_CLEAR(clear_module_state->__pyx_n_s_str_size); - Py_CLEAR(clear_module_state->__pyx_kp_s_strided_and_direct); - Py_CLEAR(clear_module_state->__pyx_kp_s_strided_and_direct_or_indirect); - Py_CLEAR(clear_module_state->__pyx_kp_s_strided_and_indirect); - Py_CLEAR(clear_module_state->__pyx_kp_s_stringsource); - Py_CLEAR(clear_module_state->__pyx_n_s_struct); - Py_CLEAR(clear_module_state->__pyx_n_s_supported_by_runtime); - Py_CLEAR(clear_module_state->__pyx_n_s_sys); - Py_CLEAR(clear_module_state->__pyx_n_s_test); - Py_CLEAR(clear_module_state->__pyx_n_s_throw); - Py_CLEAR(clear_module_state->__pyx_n_u_u); - Py_CLEAR(clear_module_state->__pyx_kp_s_unable_to_allocate_array_data); - Py_CLEAR(clear_module_state->__pyx_kp_s_unable_to_allocate_shape_and_str); - Py_CLEAR(clear_module_state->__pyx_n_s_unpack); - Py_CLEAR(clear_module_state->__pyx_n_s_update); - Py_CLEAR(clear_module_state->__pyx_kp_u_utf_8); - Py_CLEAR(clear_module_state->__pyx_n_s_values); - Py_CLEAR(clear_module_state->__pyx_n_s_version_info); - Py_CLEAR(clear_module_state->__pyx_int_0); - Py_CLEAR(clear_module_state->__pyx_int_1); - Py_CLEAR(clear_module_state->__pyx_int_3); - Py_CLEAR(clear_module_state->__pyx_int_112105877); - Py_CLEAR(clear_module_state->__pyx_int_136983863); - Py_CLEAR(clear_module_state->__pyx_int_184977713); - Py_CLEAR(clear_module_state->__pyx_int_neg_1); - Py_CLEAR(clear_module_state->__pyx_slice__5); - Py_CLEAR(clear_module_state->__pyx_tuple__4); - Py_CLEAR(clear_module_state->__pyx_tuple__8); - Py_CLEAR(clear_module_state->__pyx_tuple__9); - Py_CLEAR(clear_module_state->__pyx_tuple__10); - Py_CLEAR(clear_module_state->__pyx_tuple__14); - Py_CLEAR(clear_module_state->__pyx_tuple__15); - Py_CLEAR(clear_module_state->__pyx_tuple__17); - Py_CLEAR(clear_module_state->__pyx_tuple__18); - Py_CLEAR(clear_module_state->__pyx_tuple__19); - Py_CLEAR(clear_module_state->__pyx_tuple__20); - Py_CLEAR(clear_module_state->__pyx_tuple__21); - Py_CLEAR(clear_module_state->__pyx_tuple__22); - Py_CLEAR(clear_module_state->__pyx_tuple__23); - Py_CLEAR(clear_module_state->__pyx_tuple__24); - Py_CLEAR(clear_module_state->__pyx_tuple__25); - Py_CLEAR(clear_module_state->__pyx_tuple__26); - Py_CLEAR(clear_module_state->__pyx_tuple__27); - Py_CLEAR(clear_module_state->__pyx_tuple__28); - Py_CLEAR(clear_module_state->__pyx_tuple__30); - Py_CLEAR(clear_module_state->__pyx_tuple__32); - Py_CLEAR(clear_module_state->__pyx_tuple__34); - Py_CLEAR(clear_module_state->__pyx_tuple__37); - Py_CLEAR(clear_module_state->__pyx_tuple__41); - Py_CLEAR(clear_module_state->__pyx_tuple__43); - Py_CLEAR(clear_module_state->__pyx_tuple__44); - Py_CLEAR(clear_module_state->__pyx_tuple__45); - Py_CLEAR(clear_module_state->__pyx_tuple__50); - Py_CLEAR(clear_module_state->__pyx_tuple__52); - Py_CLEAR(clear_module_state->__pyx_tuple__53); - Py_CLEAR(clear_module_state->__pyx_tuple__55); - Py_CLEAR(clear_module_state->__pyx_tuple__56); - Py_CLEAR(clear_module_state->__pyx_codeobj__11); - Py_CLEAR(clear_module_state->__pyx_codeobj__12); - Py_CLEAR(clear_module_state->__pyx_codeobj__16); - Py_CLEAR(clear_module_state->__pyx_codeobj__29); - Py_CLEAR(clear_module_state->__pyx_codeobj__31); - Py_CLEAR(clear_module_state->__pyx_codeobj__33); - Py_CLEAR(clear_module_state->__pyx_codeobj__35); - Py_CLEAR(clear_module_state->__pyx_codeobj__36); - Py_CLEAR(clear_module_state->__pyx_codeobj__38); - Py_CLEAR(clear_module_state->__pyx_codeobj__39); - Py_CLEAR(clear_module_state->__pyx_codeobj__40); - Py_CLEAR(clear_module_state->__pyx_codeobj__42); - Py_CLEAR(clear_module_state->__pyx_codeobj__46); - Py_CLEAR(clear_module_state->__pyx_codeobj__47); - Py_CLEAR(clear_module_state->__pyx_codeobj__48); - Py_CLEAR(clear_module_state->__pyx_codeobj__49); - Py_CLEAR(clear_module_state->__pyx_codeobj__51); - Py_CLEAR(clear_module_state->__pyx_codeobj__54); - Py_CLEAR(clear_module_state->__pyx_codeobj__57); - Py_CLEAR(clear_module_state->__pyx_codeobj__58); - return 0; -} -#endif -/* #### Code section: module_state_traverse ### */ -#if CYTHON_USE_MODULE_STATE -static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) { - __pyx_mstate *traverse_module_state = __pyx_mstate(m); - if (!traverse_module_state) return 0; - Py_VISIT(traverse_module_state->__pyx_d); - Py_VISIT(traverse_module_state->__pyx_b); - Py_VISIT(traverse_module_state->__pyx_cython_runtime); - Py_VISIT(traverse_module_state->__pyx_empty_tuple); - Py_VISIT(traverse_module_state->__pyx_empty_bytes); - Py_VISIT(traverse_module_state->__pyx_empty_unicode); - #ifdef __Pyx_CyFunction_USED - Py_VISIT(traverse_module_state->__pyx_CyFunctionType); - #endif - #ifdef __Pyx_FusedFunction_USED - Py_VISIT(traverse_module_state->__pyx_FusedFunctionType); - #endif - Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4type_type); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson_ArrayBuffer); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson_ArrayBuffer); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson_Array); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson_Array); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson_Object); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson_Object); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson_Parser); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson_Parser); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct____iter__); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson___pyx_scope_struct____iter__); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson___pyx_scope_struct_1___iter__); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_2_values); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson___pyx_scope_struct_2_values); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_3_items); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson___pyx_scope_struct_3_items); - Py_VISIT(traverse_module_state->__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations); - Py_VISIT(traverse_module_state->__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations); - Py_VISIT(traverse_module_state->__pyx_array_type); - Py_VISIT(traverse_module_state->__pyx_type___pyx_array); - Py_VISIT(traverse_module_state->__pyx_MemviewEnum_type); - Py_VISIT(traverse_module_state->__pyx_type___pyx_MemviewEnum); - Py_VISIT(traverse_module_state->__pyx_memoryview_type); - Py_VISIT(traverse_module_state->__pyx_type___pyx_memoryview); - Py_VISIT(traverse_module_state->__pyx_memoryviewslice_type); - Py_VISIT(traverse_module_state->__pyx_type___pyx_memoryviewslice); - Py_VISIT(traverse_module_state->__pyx_kp_u_); - Py_VISIT(traverse_module_state->__pyx_n_s_ASCII); - Py_VISIT(traverse_module_state->__pyx_kp_s_All_dimensions_preceding_dimensi); - Py_VISIT(traverse_module_state->__pyx_n_s_Array); - Py_VISIT(traverse_module_state->__pyx_n_s_ArrayBuffer); - Py_VISIT(traverse_module_state->__pyx_n_s_ArrayBuffer___reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_ArrayBuffer___setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Array___iter); - Py_VISIT(traverse_module_state->__pyx_n_s_Array___reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Array___setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Array_as_buffer); - Py_VISIT(traverse_module_state->__pyx_n_s_Array_as_list); - Py_VISIT(traverse_module_state->__pyx_n_s_Array_at_pointer); - Py_VISIT(traverse_module_state->__pyx_n_s_AssertionError); - Py_VISIT(traverse_module_state->__pyx_kp_u_Attempted_to_set_a_runtime_Imple); - Py_VISIT(traverse_module_state->__pyx_kp_s_Buffer_view_does_not_expose_stri); - Py_VISIT(traverse_module_state->__pyx_kp_s_Can_only_create_a_buffer_that_is); - Py_VISIT(traverse_module_state->__pyx_kp_s_Cannot_assign_to_read_only_memor); - Py_VISIT(traverse_module_state->__pyx_kp_s_Cannot_create_writable_memory_vi); - Py_VISIT(traverse_module_state->__pyx_kp_u_Cannot_index_with_type); - Py_VISIT(traverse_module_state->__pyx_kp_s_Cannot_transpose_memoryview_with); - Py_VISIT(traverse_module_state->__pyx_kp_s_Dimension_d_is_not_direct); - Py_VISIT(traverse_module_state->__pyx_kp_u_EMPTY_no_JSON_found); - Py_VISIT(traverse_module_state->__pyx_n_s_Ellipsis); - Py_VISIT(traverse_module_state->__pyx_kp_s_Empty_shape_tuple_for_cython_arr); - Py_VISIT(traverse_module_state->__pyx_kp_u_Encountered_an_unknown_element_t); - Py_VISIT(traverse_module_state->__pyx_kp_s_Incompatible_checksums_0x_x_vs_0); - Py_VISIT(traverse_module_state->__pyx_n_s_IndexError); - Py_VISIT(traverse_module_state->__pyx_kp_s_Index_out_of_bounds_axis_d); - Py_VISIT(traverse_module_state->__pyx_kp_s_Indirect_dimensions_not_supporte); - Py_VISIT(traverse_module_state->__pyx_kp_u_Invalid_mode_expected_c_or_fortr); - Py_VISIT(traverse_module_state->__pyx_kp_u_Invalid_shape_in_axis); - Py_VISIT(traverse_module_state->__pyx_n_s_KeyError); - Py_VISIT(traverse_module_state->__pyx_n_s_MAXSIZE_BYTES); - Py_VISIT(traverse_module_state->__pyx_n_s_MemoryError); - Py_VISIT(traverse_module_state->__pyx_kp_s_MemoryView_of_r_at_0x_x); - Py_VISIT(traverse_module_state->__pyx_kp_s_MemoryView_of_r_object); - Py_VISIT(traverse_module_state->__pyx_n_b_O); - Py_VISIT(traverse_module_state->__pyx_n_s_Object); - Py_VISIT(traverse_module_state->__pyx_n_s_Object___iter); - Py_VISIT(traverse_module_state->__pyx_n_s_Object___reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Object___setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Object_as_dict); - Py_VISIT(traverse_module_state->__pyx_n_s_Object_at_pointer); - Py_VISIT(traverse_module_state->__pyx_n_s_Object_get); - Py_VISIT(traverse_module_state->__pyx_n_s_Object_items); - Py_VISIT(traverse_module_state->__pyx_n_s_Object_values); - Py_VISIT(traverse_module_state->__pyx_kp_u_Out_of_bounds_on_buffer_access_a); - Py_VISIT(traverse_module_state->__pyx_n_s_PADDING); - Py_VISIT(traverse_module_state->__pyx_n_s_Parser); - Py_VISIT(traverse_module_state->__pyx_n_s_Parser___reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Parser___setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_Parser_get_implementations); - Py_VISIT(traverse_module_state->__pyx_n_s_Parser_load); - Py_VISIT(traverse_module_state->__pyx_n_s_Parser_parse); - Py_VISIT(traverse_module_state->__pyx_n_s_Path); - Py_VISIT(traverse_module_state->__pyx_n_s_PickleError); - Py_VISIT(traverse_module_state->__pyx_n_s_RuntimeError); - Py_VISIT(traverse_module_state->__pyx_n_s_Sequence); - Py_VISIT(traverse_module_state->__pyx_kp_s_Step_may_not_be_zero_axis_d); - Py_VISIT(traverse_module_state->__pyx_kp_u_Tried_to_re_use_a_parser_while_s); - Py_VISIT(traverse_module_state->__pyx_n_s_TypeError); - Py_VISIT(traverse_module_state->__pyx_kp_s_Unable_to_convert_item_to_object); - Py_VISIT(traverse_module_state->__pyx_kp_u_Unknown_Implementation); - Py_VISIT(traverse_module_state->__pyx_n_s_VERSION); - Py_VISIT(traverse_module_state->__pyx_n_s_ValueError); - Py_VISIT(traverse_module_state->__pyx_n_s_View_MemoryView); - Py_VISIT(traverse_module_state->__pyx_kp_u__2); - Py_VISIT(traverse_module_state->__pyx_n_s__3); - Py_VISIT(traverse_module_state->__pyx_n_s__59); - Py_VISIT(traverse_module_state->__pyx_kp_u__6); - Py_VISIT(traverse_module_state->__pyx_kp_u__7); - Py_VISIT(traverse_module_state->__pyx_n_s_abc); - Py_VISIT(traverse_module_state->__pyx_n_s_allocate_buffer); - Py_VISIT(traverse_module_state->__pyx_kp_u_and); - Py_VISIT(traverse_module_state->__pyx_n_s_args); - Py_VISIT(traverse_module_state->__pyx_n_s_as_buffer); - Py_VISIT(traverse_module_state->__pyx_n_s_as_dict); - Py_VISIT(traverse_module_state->__pyx_n_s_as_list); - Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines); - Py_VISIT(traverse_module_state->__pyx_n_s_at_pointer); - Py_VISIT(traverse_module_state->__pyx_n_s_base); - Py_VISIT(traverse_module_state->__pyx_n_s_bytes_data); - Py_VISIT(traverse_module_state->__pyx_n_s_c); - Py_VISIT(traverse_module_state->__pyx_n_u_c); - Py_VISIT(traverse_module_state->__pyx_n_s_class); - Py_VISIT(traverse_module_state->__pyx_n_s_class_getitem); - Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback); - Py_VISIT(traverse_module_state->__pyx_n_s_close); - Py_VISIT(traverse_module_state->__pyx_n_s_collections); - Py_VISIT(traverse_module_state->__pyx_kp_s_collections_abc); - Py_VISIT(traverse_module_state->__pyx_kp_s_contiguous_and_direct); - Py_VISIT(traverse_module_state->__pyx_kp_s_contiguous_and_indirect); - Py_VISIT(traverse_module_state->__pyx_n_s_count); - Py_VISIT(traverse_module_state->__pyx_n_s_csimdjson); - Py_VISIT(traverse_module_state->__pyx_n_u_d); - Py_VISIT(traverse_module_state->__pyx_n_s_data); - Py_VISIT(traverse_module_state->__pyx_n_s_default); - Py_VISIT(traverse_module_state->__pyx_n_s_dict); - Py_VISIT(traverse_module_state->__pyx_kp_u_disable); - Py_VISIT(traverse_module_state->__pyx_n_s_document); - Py_VISIT(traverse_module_state->__pyx_n_s_dtype_is_object); - Py_VISIT(traverse_module_state->__pyx_kp_u_enable); - Py_VISIT(traverse_module_state->__pyx_n_s_encode); - Py_VISIT(traverse_module_state->__pyx_n_s_enumerate); - Py_VISIT(traverse_module_state->__pyx_n_s_error); - Py_VISIT(traverse_module_state->__pyx_n_s_flags); - Py_VISIT(traverse_module_state->__pyx_n_s_format); - Py_VISIT(traverse_module_state->__pyx_n_s_fortran); - Py_VISIT(traverse_module_state->__pyx_n_u_fortran); - Py_VISIT(traverse_module_state->__pyx_kp_u_gc); - Py_VISIT(traverse_module_state->__pyx_n_s_get); - Py_VISIT(traverse_module_state->__pyx_n_s_get_implementations); - Py_VISIT(traverse_module_state->__pyx_n_s_getstate); - Py_VISIT(traverse_module_state->__pyx_kp_u_got); - Py_VISIT(traverse_module_state->__pyx_kp_u_got_differing_extents_in_dimensi); - Py_VISIT(traverse_module_state->__pyx_n_u_i); - Py_VISIT(traverse_module_state->__pyx_n_s_id); - Py_VISIT(traverse_module_state->__pyx_n_s_impl); - Py_VISIT(traverse_module_state->__pyx_n_s_import); - Py_VISIT(traverse_module_state->__pyx_n_s_index); - Py_VISIT(traverse_module_state->__pyx_n_s_initializing); - Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine); - Py_VISIT(traverse_module_state->__pyx_kp_u_isenabled); - Py_VISIT(traverse_module_state->__pyx_n_s_it); - Py_VISIT(traverse_module_state->__pyx_n_s_items); - Py_VISIT(traverse_module_state->__pyx_n_s_itemsize); - Py_VISIT(traverse_module_state->__pyx_kp_s_itemsize_0_for_cython_array); - Py_VISIT(traverse_module_state->__pyx_n_s_iter); - Py_VISIT(traverse_module_state->__pyx_n_s_json_pointer); - Py_VISIT(traverse_module_state->__pyx_n_s_key); - Py_VISIT(traverse_module_state->__pyx_n_s_keys); - Py_VISIT(traverse_module_state->__pyx_n_s_load); - Py_VISIT(traverse_module_state->__pyx_n_s_main); - Py_VISIT(traverse_module_state->__pyx_n_s_max_capacity); - Py_VISIT(traverse_module_state->__pyx_n_s_memview); - Py_VISIT(traverse_module_state->__pyx_n_s_mode); - Py_VISIT(traverse_module_state->__pyx_n_s_name); - Py_VISIT(traverse_module_state->__pyx_n_s_name_2); - Py_VISIT(traverse_module_state->__pyx_n_s_ndim); - Py_VISIT(traverse_module_state->__pyx_n_s_new); - Py_VISIT(traverse_module_state->__pyx_kp_s_no_default___reduce___due_to_non); - Py_VISIT(traverse_module_state->__pyx_n_s_obj); - Py_VISIT(traverse_module_state->__pyx_n_s_of_type); - Py_VISIT(traverse_module_state->__pyx_kp_u_of_type_must_be_one_of_d_i_u); - Py_VISIT(traverse_module_state->__pyx_n_s_pack); - Py_VISIT(traverse_module_state->__pyx_n_s_parse); - Py_VISIT(traverse_module_state->__pyx_n_s_path); - Py_VISIT(traverse_module_state->__pyx_n_s_pathlib); - Py_VISIT(traverse_module_state->__pyx_n_s_pickle); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_PickleError); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_checksum); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_result); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_state); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_type); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_unpickle_Enum); - Py_VISIT(traverse_module_state->__pyx_n_s_pyx_vtable); - Py_VISIT(traverse_module_state->__pyx_n_s_range); - Py_VISIT(traverse_module_state->__pyx_n_s_recursive); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_reduce_ex); - Py_VISIT(traverse_module_state->__pyx_n_s_register); - Py_VISIT(traverse_module_state->__pyx_n_s_self); - Py_VISIT(traverse_module_state->__pyx_kp_s_self_c_element_self_c_parser_can); - Py_VISIT(traverse_module_state->__pyx_n_s_send); - Py_VISIT(traverse_module_state->__pyx_n_s_setstate); - Py_VISIT(traverse_module_state->__pyx_n_s_setstate_cython); - Py_VISIT(traverse_module_state->__pyx_n_s_shape); - Py_VISIT(traverse_module_state->__pyx_kp_s_simdjson_csimdjson_pyx); - Py_VISIT(traverse_module_state->__pyx_n_s_size); - Py_VISIT(traverse_module_state->__pyx_n_s_spec); - Py_VISIT(traverse_module_state->__pyx_n_s_src); - Py_VISIT(traverse_module_state->__pyx_n_s_start); - Py_VISIT(traverse_module_state->__pyx_n_s_step); - Py_VISIT(traverse_module_state->__pyx_n_s_stop); - Py_VISIT(traverse_module_state->__pyx_n_s_str_data); - Py_VISIT(traverse_module_state->__pyx_n_s_str_size); - Py_VISIT(traverse_module_state->__pyx_kp_s_strided_and_direct); - Py_VISIT(traverse_module_state->__pyx_kp_s_strided_and_direct_or_indirect); - Py_VISIT(traverse_module_state->__pyx_kp_s_strided_and_indirect); - Py_VISIT(traverse_module_state->__pyx_kp_s_stringsource); - Py_VISIT(traverse_module_state->__pyx_n_s_struct); - Py_VISIT(traverse_module_state->__pyx_n_s_supported_by_runtime); - Py_VISIT(traverse_module_state->__pyx_n_s_sys); - Py_VISIT(traverse_module_state->__pyx_n_s_test); - Py_VISIT(traverse_module_state->__pyx_n_s_throw); - Py_VISIT(traverse_module_state->__pyx_n_u_u); - Py_VISIT(traverse_module_state->__pyx_kp_s_unable_to_allocate_array_data); - Py_VISIT(traverse_module_state->__pyx_kp_s_unable_to_allocate_shape_and_str); - Py_VISIT(traverse_module_state->__pyx_n_s_unpack); - Py_VISIT(traverse_module_state->__pyx_n_s_update); - Py_VISIT(traverse_module_state->__pyx_kp_u_utf_8); - Py_VISIT(traverse_module_state->__pyx_n_s_values); - Py_VISIT(traverse_module_state->__pyx_n_s_version_info); - Py_VISIT(traverse_module_state->__pyx_int_0); - Py_VISIT(traverse_module_state->__pyx_int_1); - Py_VISIT(traverse_module_state->__pyx_int_3); - Py_VISIT(traverse_module_state->__pyx_int_112105877); - Py_VISIT(traverse_module_state->__pyx_int_136983863); - Py_VISIT(traverse_module_state->__pyx_int_184977713); - Py_VISIT(traverse_module_state->__pyx_int_neg_1); - Py_VISIT(traverse_module_state->__pyx_slice__5); - Py_VISIT(traverse_module_state->__pyx_tuple__4); - Py_VISIT(traverse_module_state->__pyx_tuple__8); - Py_VISIT(traverse_module_state->__pyx_tuple__9); - Py_VISIT(traverse_module_state->__pyx_tuple__10); - Py_VISIT(traverse_module_state->__pyx_tuple__14); - Py_VISIT(traverse_module_state->__pyx_tuple__15); - Py_VISIT(traverse_module_state->__pyx_tuple__17); - Py_VISIT(traverse_module_state->__pyx_tuple__18); - Py_VISIT(traverse_module_state->__pyx_tuple__19); - Py_VISIT(traverse_module_state->__pyx_tuple__20); - Py_VISIT(traverse_module_state->__pyx_tuple__21); - Py_VISIT(traverse_module_state->__pyx_tuple__22); - Py_VISIT(traverse_module_state->__pyx_tuple__23); - Py_VISIT(traverse_module_state->__pyx_tuple__24); - Py_VISIT(traverse_module_state->__pyx_tuple__25); - Py_VISIT(traverse_module_state->__pyx_tuple__26); - Py_VISIT(traverse_module_state->__pyx_tuple__27); - Py_VISIT(traverse_module_state->__pyx_tuple__28); - Py_VISIT(traverse_module_state->__pyx_tuple__30); - Py_VISIT(traverse_module_state->__pyx_tuple__32); - Py_VISIT(traverse_module_state->__pyx_tuple__34); - Py_VISIT(traverse_module_state->__pyx_tuple__37); - Py_VISIT(traverse_module_state->__pyx_tuple__41); - Py_VISIT(traverse_module_state->__pyx_tuple__43); - Py_VISIT(traverse_module_state->__pyx_tuple__44); - Py_VISIT(traverse_module_state->__pyx_tuple__45); - Py_VISIT(traverse_module_state->__pyx_tuple__50); - Py_VISIT(traverse_module_state->__pyx_tuple__52); - Py_VISIT(traverse_module_state->__pyx_tuple__53); - Py_VISIT(traverse_module_state->__pyx_tuple__55); - Py_VISIT(traverse_module_state->__pyx_tuple__56); - Py_VISIT(traverse_module_state->__pyx_codeobj__11); - Py_VISIT(traverse_module_state->__pyx_codeobj__12); - Py_VISIT(traverse_module_state->__pyx_codeobj__16); - Py_VISIT(traverse_module_state->__pyx_codeobj__29); - Py_VISIT(traverse_module_state->__pyx_codeobj__31); - Py_VISIT(traverse_module_state->__pyx_codeobj__33); - Py_VISIT(traverse_module_state->__pyx_codeobj__35); - Py_VISIT(traverse_module_state->__pyx_codeobj__36); - Py_VISIT(traverse_module_state->__pyx_codeobj__38); - Py_VISIT(traverse_module_state->__pyx_codeobj__39); - Py_VISIT(traverse_module_state->__pyx_codeobj__40); - Py_VISIT(traverse_module_state->__pyx_codeobj__42); - Py_VISIT(traverse_module_state->__pyx_codeobj__46); - Py_VISIT(traverse_module_state->__pyx_codeobj__47); - Py_VISIT(traverse_module_state->__pyx_codeobj__48); - Py_VISIT(traverse_module_state->__pyx_codeobj__49); - Py_VISIT(traverse_module_state->__pyx_codeobj__51); - Py_VISIT(traverse_module_state->__pyx_codeobj__54); - Py_VISIT(traverse_module_state->__pyx_codeobj__57); - Py_VISIT(traverse_module_state->__pyx_codeobj__58); - return 0; -} -#endif -/* #### Code section: module_state_defines ### */ -#define __pyx_d __pyx_mstate_global->__pyx_d -#define __pyx_b __pyx_mstate_global->__pyx_b -#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime -#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple -#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes -#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode -#ifdef __Pyx_CyFunction_USED -#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType -#endif -#ifdef __Pyx_FusedFunction_USED -#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType -#endif -#ifdef __Pyx_Generator_USED -#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType -#endif -#ifdef __Pyx_IterableCoroutine_USED -#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType -#endif -#ifdef __Pyx_Coroutine_USED -#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#define __pyx_ptype_7cpython_4type_type __pyx_mstate_global->__pyx_ptype_7cpython_4type_type -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#endif -#if CYTHON_USE_MODULE_STATE -#define __pyx_type_9csimdjson_ArrayBuffer __pyx_mstate_global->__pyx_type_9csimdjson_ArrayBuffer -#define __pyx_type_9csimdjson_Array __pyx_mstate_global->__pyx_type_9csimdjson_Array -#define __pyx_type_9csimdjson_Object __pyx_mstate_global->__pyx_type_9csimdjson_Object -#define __pyx_type_9csimdjson_Parser __pyx_mstate_global->__pyx_type_9csimdjson_Parser -#define __pyx_type_9csimdjson___pyx_scope_struct____iter__ __pyx_mstate_global->__pyx_type_9csimdjson___pyx_scope_struct____iter__ -#define __pyx_type_9csimdjson___pyx_scope_struct_1___iter__ __pyx_mstate_global->__pyx_type_9csimdjson___pyx_scope_struct_1___iter__ -#define __pyx_type_9csimdjson___pyx_scope_struct_2_values __pyx_mstate_global->__pyx_type_9csimdjson___pyx_scope_struct_2_values -#define __pyx_type_9csimdjson___pyx_scope_struct_3_items __pyx_mstate_global->__pyx_type_9csimdjson___pyx_scope_struct_3_items -#define __pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations __pyx_mstate_global->__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations -#define __pyx_type___pyx_array __pyx_mstate_global->__pyx_type___pyx_array -#define __pyx_type___pyx_MemviewEnum __pyx_mstate_global->__pyx_type___pyx_MemviewEnum -#define __pyx_type___pyx_memoryview __pyx_mstate_global->__pyx_type___pyx_memoryview -#define __pyx_type___pyx_memoryviewslice __pyx_mstate_global->__pyx_type___pyx_memoryviewslice -#endif -#define __pyx_ptype_9csimdjson_ArrayBuffer __pyx_mstate_global->__pyx_ptype_9csimdjson_ArrayBuffer -#define __pyx_ptype_9csimdjson_Array __pyx_mstate_global->__pyx_ptype_9csimdjson_Array -#define __pyx_ptype_9csimdjson_Object __pyx_mstate_global->__pyx_ptype_9csimdjson_Object -#define __pyx_ptype_9csimdjson_Parser __pyx_mstate_global->__pyx_ptype_9csimdjson_Parser -#define __pyx_ptype_9csimdjson___pyx_scope_struct____iter__ __pyx_mstate_global->__pyx_ptype_9csimdjson___pyx_scope_struct____iter__ -#define __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__ __pyx_mstate_global->__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__ -#define __pyx_ptype_9csimdjson___pyx_scope_struct_2_values __pyx_mstate_global->__pyx_ptype_9csimdjson___pyx_scope_struct_2_values -#define __pyx_ptype_9csimdjson___pyx_scope_struct_3_items __pyx_mstate_global->__pyx_ptype_9csimdjson___pyx_scope_struct_3_items -#define __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations __pyx_mstate_global->__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations -#define __pyx_array_type __pyx_mstate_global->__pyx_array_type -#define __pyx_MemviewEnum_type __pyx_mstate_global->__pyx_MemviewEnum_type -#define __pyx_memoryview_type __pyx_mstate_global->__pyx_memoryview_type -#define __pyx_memoryviewslice_type __pyx_mstate_global->__pyx_memoryviewslice_type -#define __pyx_kp_u_ __pyx_mstate_global->__pyx_kp_u_ -#define __pyx_n_s_ASCII __pyx_mstate_global->__pyx_n_s_ASCII -#define __pyx_kp_s_All_dimensions_preceding_dimensi __pyx_mstate_global->__pyx_kp_s_All_dimensions_preceding_dimensi -#define __pyx_n_s_Array __pyx_mstate_global->__pyx_n_s_Array -#define __pyx_n_s_ArrayBuffer __pyx_mstate_global->__pyx_n_s_ArrayBuffer -#define __pyx_n_s_ArrayBuffer___reduce_cython __pyx_mstate_global->__pyx_n_s_ArrayBuffer___reduce_cython -#define __pyx_n_s_ArrayBuffer___setstate_cython __pyx_mstate_global->__pyx_n_s_ArrayBuffer___setstate_cython -#define __pyx_n_s_Array___iter __pyx_mstate_global->__pyx_n_s_Array___iter -#define __pyx_n_s_Array___reduce_cython __pyx_mstate_global->__pyx_n_s_Array___reduce_cython -#define __pyx_n_s_Array___setstate_cython __pyx_mstate_global->__pyx_n_s_Array___setstate_cython -#define __pyx_n_s_Array_as_buffer __pyx_mstate_global->__pyx_n_s_Array_as_buffer -#define __pyx_n_s_Array_as_list __pyx_mstate_global->__pyx_n_s_Array_as_list -#define __pyx_n_s_Array_at_pointer __pyx_mstate_global->__pyx_n_s_Array_at_pointer -#define __pyx_n_s_AssertionError __pyx_mstate_global->__pyx_n_s_AssertionError -#define __pyx_kp_u_Attempted_to_set_a_runtime_Imple __pyx_mstate_global->__pyx_kp_u_Attempted_to_set_a_runtime_Imple -#define __pyx_kp_s_Buffer_view_does_not_expose_stri __pyx_mstate_global->__pyx_kp_s_Buffer_view_does_not_expose_stri -#define __pyx_kp_s_Can_only_create_a_buffer_that_is __pyx_mstate_global->__pyx_kp_s_Can_only_create_a_buffer_that_is -#define __pyx_kp_s_Cannot_assign_to_read_only_memor __pyx_mstate_global->__pyx_kp_s_Cannot_assign_to_read_only_memor -#define __pyx_kp_s_Cannot_create_writable_memory_vi __pyx_mstate_global->__pyx_kp_s_Cannot_create_writable_memory_vi -#define __pyx_kp_u_Cannot_index_with_type __pyx_mstate_global->__pyx_kp_u_Cannot_index_with_type -#define __pyx_kp_s_Cannot_transpose_memoryview_with __pyx_mstate_global->__pyx_kp_s_Cannot_transpose_memoryview_with -#define __pyx_kp_s_Dimension_d_is_not_direct __pyx_mstate_global->__pyx_kp_s_Dimension_d_is_not_direct -#define __pyx_kp_u_EMPTY_no_JSON_found __pyx_mstate_global->__pyx_kp_u_EMPTY_no_JSON_found -#define __pyx_n_s_Ellipsis __pyx_mstate_global->__pyx_n_s_Ellipsis -#define __pyx_kp_s_Empty_shape_tuple_for_cython_arr __pyx_mstate_global->__pyx_kp_s_Empty_shape_tuple_for_cython_arr -#define __pyx_kp_u_Encountered_an_unknown_element_t __pyx_mstate_global->__pyx_kp_u_Encountered_an_unknown_element_t -#define __pyx_kp_s_Incompatible_checksums_0x_x_vs_0 __pyx_mstate_global->__pyx_kp_s_Incompatible_checksums_0x_x_vs_0 -#define __pyx_n_s_IndexError __pyx_mstate_global->__pyx_n_s_IndexError -#define __pyx_kp_s_Index_out_of_bounds_axis_d __pyx_mstate_global->__pyx_kp_s_Index_out_of_bounds_axis_d -#define __pyx_kp_s_Indirect_dimensions_not_supporte __pyx_mstate_global->__pyx_kp_s_Indirect_dimensions_not_supporte -#define __pyx_kp_u_Invalid_mode_expected_c_or_fortr __pyx_mstate_global->__pyx_kp_u_Invalid_mode_expected_c_or_fortr -#define __pyx_kp_u_Invalid_shape_in_axis __pyx_mstate_global->__pyx_kp_u_Invalid_shape_in_axis -#define __pyx_n_s_KeyError __pyx_mstate_global->__pyx_n_s_KeyError -#define __pyx_n_s_MAXSIZE_BYTES __pyx_mstate_global->__pyx_n_s_MAXSIZE_BYTES -#define __pyx_n_s_MemoryError __pyx_mstate_global->__pyx_n_s_MemoryError -#define __pyx_kp_s_MemoryView_of_r_at_0x_x __pyx_mstate_global->__pyx_kp_s_MemoryView_of_r_at_0x_x -#define __pyx_kp_s_MemoryView_of_r_object __pyx_mstate_global->__pyx_kp_s_MemoryView_of_r_object -#define __pyx_n_b_O __pyx_mstate_global->__pyx_n_b_O -#define __pyx_n_s_Object __pyx_mstate_global->__pyx_n_s_Object -#define __pyx_n_s_Object___iter __pyx_mstate_global->__pyx_n_s_Object___iter -#define __pyx_n_s_Object___reduce_cython __pyx_mstate_global->__pyx_n_s_Object___reduce_cython -#define __pyx_n_s_Object___setstate_cython __pyx_mstate_global->__pyx_n_s_Object___setstate_cython -#define __pyx_n_s_Object_as_dict __pyx_mstate_global->__pyx_n_s_Object_as_dict -#define __pyx_n_s_Object_at_pointer __pyx_mstate_global->__pyx_n_s_Object_at_pointer -#define __pyx_n_s_Object_get __pyx_mstate_global->__pyx_n_s_Object_get -#define __pyx_n_s_Object_items __pyx_mstate_global->__pyx_n_s_Object_items -#define __pyx_n_s_Object_values __pyx_mstate_global->__pyx_n_s_Object_values -#define __pyx_kp_u_Out_of_bounds_on_buffer_access_a __pyx_mstate_global->__pyx_kp_u_Out_of_bounds_on_buffer_access_a -#define __pyx_n_s_PADDING __pyx_mstate_global->__pyx_n_s_PADDING -#define __pyx_n_s_Parser __pyx_mstate_global->__pyx_n_s_Parser -#define __pyx_n_s_Parser___reduce_cython __pyx_mstate_global->__pyx_n_s_Parser___reduce_cython -#define __pyx_n_s_Parser___setstate_cython __pyx_mstate_global->__pyx_n_s_Parser___setstate_cython -#define __pyx_n_s_Parser_get_implementations __pyx_mstate_global->__pyx_n_s_Parser_get_implementations -#define __pyx_n_s_Parser_load __pyx_mstate_global->__pyx_n_s_Parser_load -#define __pyx_n_s_Parser_parse __pyx_mstate_global->__pyx_n_s_Parser_parse -#define __pyx_n_s_Path __pyx_mstate_global->__pyx_n_s_Path -#define __pyx_n_s_PickleError __pyx_mstate_global->__pyx_n_s_PickleError -#define __pyx_n_s_RuntimeError __pyx_mstate_global->__pyx_n_s_RuntimeError -#define __pyx_n_s_Sequence __pyx_mstate_global->__pyx_n_s_Sequence -#define __pyx_kp_s_Step_may_not_be_zero_axis_d __pyx_mstate_global->__pyx_kp_s_Step_may_not_be_zero_axis_d -#define __pyx_kp_u_Tried_to_re_use_a_parser_while_s __pyx_mstate_global->__pyx_kp_u_Tried_to_re_use_a_parser_while_s -#define __pyx_n_s_TypeError __pyx_mstate_global->__pyx_n_s_TypeError -#define __pyx_kp_s_Unable_to_convert_item_to_object __pyx_mstate_global->__pyx_kp_s_Unable_to_convert_item_to_object -#define __pyx_kp_u_Unknown_Implementation __pyx_mstate_global->__pyx_kp_u_Unknown_Implementation -#define __pyx_n_s_VERSION __pyx_mstate_global->__pyx_n_s_VERSION -#define __pyx_n_s_ValueError __pyx_mstate_global->__pyx_n_s_ValueError -#define __pyx_n_s_View_MemoryView __pyx_mstate_global->__pyx_n_s_View_MemoryView -#define __pyx_kp_u__2 __pyx_mstate_global->__pyx_kp_u__2 -#define __pyx_n_s__3 __pyx_mstate_global->__pyx_n_s__3 -#define __pyx_n_s__59 __pyx_mstate_global->__pyx_n_s__59 -#define __pyx_kp_u__6 __pyx_mstate_global->__pyx_kp_u__6 -#define __pyx_kp_u__7 __pyx_mstate_global->__pyx_kp_u__7 -#define __pyx_n_s_abc __pyx_mstate_global->__pyx_n_s_abc -#define __pyx_n_s_allocate_buffer __pyx_mstate_global->__pyx_n_s_allocate_buffer -#define __pyx_kp_u_and __pyx_mstate_global->__pyx_kp_u_and -#define __pyx_n_s_args __pyx_mstate_global->__pyx_n_s_args -#define __pyx_n_s_as_buffer __pyx_mstate_global->__pyx_n_s_as_buffer -#define __pyx_n_s_as_dict __pyx_mstate_global->__pyx_n_s_as_dict -#define __pyx_n_s_as_list __pyx_mstate_global->__pyx_n_s_as_list -#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines -#define __pyx_n_s_at_pointer __pyx_mstate_global->__pyx_n_s_at_pointer -#define __pyx_n_s_base __pyx_mstate_global->__pyx_n_s_base -#define __pyx_n_s_bytes_data __pyx_mstate_global->__pyx_n_s_bytes_data -#define __pyx_n_s_c __pyx_mstate_global->__pyx_n_s_c -#define __pyx_n_u_c __pyx_mstate_global->__pyx_n_u_c -#define __pyx_n_s_class __pyx_mstate_global->__pyx_n_s_class -#define __pyx_n_s_class_getitem __pyx_mstate_global->__pyx_n_s_class_getitem -#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback -#define __pyx_n_s_close __pyx_mstate_global->__pyx_n_s_close -#define __pyx_n_s_collections __pyx_mstate_global->__pyx_n_s_collections -#define __pyx_kp_s_collections_abc __pyx_mstate_global->__pyx_kp_s_collections_abc -#define __pyx_kp_s_contiguous_and_direct __pyx_mstate_global->__pyx_kp_s_contiguous_and_direct -#define __pyx_kp_s_contiguous_and_indirect __pyx_mstate_global->__pyx_kp_s_contiguous_and_indirect -#define __pyx_n_s_count __pyx_mstate_global->__pyx_n_s_count -#define __pyx_n_s_csimdjson __pyx_mstate_global->__pyx_n_s_csimdjson -#define __pyx_n_u_d __pyx_mstate_global->__pyx_n_u_d -#define __pyx_n_s_data __pyx_mstate_global->__pyx_n_s_data -#define __pyx_n_s_default __pyx_mstate_global->__pyx_n_s_default -#define __pyx_n_s_dict __pyx_mstate_global->__pyx_n_s_dict -#define __pyx_kp_u_disable __pyx_mstate_global->__pyx_kp_u_disable -#define __pyx_n_s_document __pyx_mstate_global->__pyx_n_s_document -#define __pyx_n_s_dtype_is_object __pyx_mstate_global->__pyx_n_s_dtype_is_object -#define __pyx_kp_u_enable __pyx_mstate_global->__pyx_kp_u_enable -#define __pyx_n_s_encode __pyx_mstate_global->__pyx_n_s_encode -#define __pyx_n_s_enumerate __pyx_mstate_global->__pyx_n_s_enumerate -#define __pyx_n_s_error __pyx_mstate_global->__pyx_n_s_error -#define __pyx_n_s_flags __pyx_mstate_global->__pyx_n_s_flags -#define __pyx_n_s_format __pyx_mstate_global->__pyx_n_s_format -#define __pyx_n_s_fortran __pyx_mstate_global->__pyx_n_s_fortran -#define __pyx_n_u_fortran __pyx_mstate_global->__pyx_n_u_fortran -#define __pyx_kp_u_gc __pyx_mstate_global->__pyx_kp_u_gc -#define __pyx_n_s_get __pyx_mstate_global->__pyx_n_s_get -#define __pyx_n_s_get_implementations __pyx_mstate_global->__pyx_n_s_get_implementations -#define __pyx_n_s_getstate __pyx_mstate_global->__pyx_n_s_getstate -#define __pyx_kp_u_got __pyx_mstate_global->__pyx_kp_u_got -#define __pyx_kp_u_got_differing_extents_in_dimensi __pyx_mstate_global->__pyx_kp_u_got_differing_extents_in_dimensi -#define __pyx_n_u_i __pyx_mstate_global->__pyx_n_u_i -#define __pyx_n_s_id __pyx_mstate_global->__pyx_n_s_id -#define __pyx_n_s_impl __pyx_mstate_global->__pyx_n_s_impl -#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import -#define __pyx_n_s_index __pyx_mstate_global->__pyx_n_s_index -#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing -#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine -#define __pyx_kp_u_isenabled __pyx_mstate_global->__pyx_kp_u_isenabled -#define __pyx_n_s_it __pyx_mstate_global->__pyx_n_s_it -#define __pyx_n_s_items __pyx_mstate_global->__pyx_n_s_items -#define __pyx_n_s_itemsize __pyx_mstate_global->__pyx_n_s_itemsize -#define __pyx_kp_s_itemsize_0_for_cython_array __pyx_mstate_global->__pyx_kp_s_itemsize_0_for_cython_array -#define __pyx_n_s_iter __pyx_mstate_global->__pyx_n_s_iter -#define __pyx_n_s_json_pointer __pyx_mstate_global->__pyx_n_s_json_pointer -#define __pyx_n_s_key __pyx_mstate_global->__pyx_n_s_key -#define __pyx_n_s_keys __pyx_mstate_global->__pyx_n_s_keys -#define __pyx_n_s_load __pyx_mstate_global->__pyx_n_s_load -#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main -#define __pyx_n_s_max_capacity __pyx_mstate_global->__pyx_n_s_max_capacity -#define __pyx_n_s_memview __pyx_mstate_global->__pyx_n_s_memview -#define __pyx_n_s_mode __pyx_mstate_global->__pyx_n_s_mode -#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name -#define __pyx_n_s_name_2 __pyx_mstate_global->__pyx_n_s_name_2 -#define __pyx_n_s_ndim __pyx_mstate_global->__pyx_n_s_ndim -#define __pyx_n_s_new __pyx_mstate_global->__pyx_n_s_new -#define __pyx_kp_s_no_default___reduce___due_to_non __pyx_mstate_global->__pyx_kp_s_no_default___reduce___due_to_non -#define __pyx_n_s_obj __pyx_mstate_global->__pyx_n_s_obj -#define __pyx_n_s_of_type __pyx_mstate_global->__pyx_n_s_of_type -#define __pyx_kp_u_of_type_must_be_one_of_d_i_u __pyx_mstate_global->__pyx_kp_u_of_type_must_be_one_of_d_i_u -#define __pyx_n_s_pack __pyx_mstate_global->__pyx_n_s_pack -#define __pyx_n_s_parse __pyx_mstate_global->__pyx_n_s_parse -#define __pyx_n_s_path __pyx_mstate_global->__pyx_n_s_path -#define __pyx_n_s_pathlib __pyx_mstate_global->__pyx_n_s_pathlib -#define __pyx_n_s_pickle __pyx_mstate_global->__pyx_n_s_pickle -#define __pyx_n_s_pyx_PickleError __pyx_mstate_global->__pyx_n_s_pyx_PickleError -#define __pyx_n_s_pyx_checksum __pyx_mstate_global->__pyx_n_s_pyx_checksum -#define __pyx_n_s_pyx_result __pyx_mstate_global->__pyx_n_s_pyx_result -#define __pyx_n_s_pyx_state __pyx_mstate_global->__pyx_n_s_pyx_state -#define __pyx_n_s_pyx_type __pyx_mstate_global->__pyx_n_s_pyx_type -#define __pyx_n_s_pyx_unpickle_Enum __pyx_mstate_global->__pyx_n_s_pyx_unpickle_Enum -#define __pyx_n_s_pyx_vtable __pyx_mstate_global->__pyx_n_s_pyx_vtable -#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range -#define __pyx_n_s_recursive __pyx_mstate_global->__pyx_n_s_recursive -#define __pyx_n_s_reduce __pyx_mstate_global->__pyx_n_s_reduce -#define __pyx_n_s_reduce_cython __pyx_mstate_global->__pyx_n_s_reduce_cython -#define __pyx_n_s_reduce_ex __pyx_mstate_global->__pyx_n_s_reduce_ex -#define __pyx_n_s_register __pyx_mstate_global->__pyx_n_s_register -#define __pyx_n_s_self __pyx_mstate_global->__pyx_n_s_self -#define __pyx_kp_s_self_c_element_self_c_parser_can __pyx_mstate_global->__pyx_kp_s_self_c_element_self_c_parser_can -#define __pyx_n_s_send __pyx_mstate_global->__pyx_n_s_send -#define __pyx_n_s_setstate __pyx_mstate_global->__pyx_n_s_setstate -#define __pyx_n_s_setstate_cython __pyx_mstate_global->__pyx_n_s_setstate_cython -#define __pyx_n_s_shape __pyx_mstate_global->__pyx_n_s_shape -#define __pyx_kp_s_simdjson_csimdjson_pyx __pyx_mstate_global->__pyx_kp_s_simdjson_csimdjson_pyx -#define __pyx_n_s_size __pyx_mstate_global->__pyx_n_s_size -#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec -#define __pyx_n_s_src __pyx_mstate_global->__pyx_n_s_src -#define __pyx_n_s_start __pyx_mstate_global->__pyx_n_s_start -#define __pyx_n_s_step __pyx_mstate_global->__pyx_n_s_step -#define __pyx_n_s_stop __pyx_mstate_global->__pyx_n_s_stop -#define __pyx_n_s_str_data __pyx_mstate_global->__pyx_n_s_str_data -#define __pyx_n_s_str_size __pyx_mstate_global->__pyx_n_s_str_size -#define __pyx_kp_s_strided_and_direct __pyx_mstate_global->__pyx_kp_s_strided_and_direct -#define __pyx_kp_s_strided_and_direct_or_indirect __pyx_mstate_global->__pyx_kp_s_strided_and_direct_or_indirect -#define __pyx_kp_s_strided_and_indirect __pyx_mstate_global->__pyx_kp_s_strided_and_indirect -#define __pyx_kp_s_stringsource __pyx_mstate_global->__pyx_kp_s_stringsource -#define __pyx_n_s_struct __pyx_mstate_global->__pyx_n_s_struct -#define __pyx_n_s_supported_by_runtime __pyx_mstate_global->__pyx_n_s_supported_by_runtime -#define __pyx_n_s_sys __pyx_mstate_global->__pyx_n_s_sys -#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test -#define __pyx_n_s_throw __pyx_mstate_global->__pyx_n_s_throw -#define __pyx_n_u_u __pyx_mstate_global->__pyx_n_u_u -#define __pyx_kp_s_unable_to_allocate_array_data __pyx_mstate_global->__pyx_kp_s_unable_to_allocate_array_data -#define __pyx_kp_s_unable_to_allocate_shape_and_str __pyx_mstate_global->__pyx_kp_s_unable_to_allocate_shape_and_str -#define __pyx_n_s_unpack __pyx_mstate_global->__pyx_n_s_unpack -#define __pyx_n_s_update __pyx_mstate_global->__pyx_n_s_update -#define __pyx_kp_u_utf_8 __pyx_mstate_global->__pyx_kp_u_utf_8 -#define __pyx_n_s_values __pyx_mstate_global->__pyx_n_s_values -#define __pyx_n_s_version_info __pyx_mstate_global->__pyx_n_s_version_info -#define __pyx_int_0 __pyx_mstate_global->__pyx_int_0 -#define __pyx_int_1 __pyx_mstate_global->__pyx_int_1 -#define __pyx_int_3 __pyx_mstate_global->__pyx_int_3 -#define __pyx_int_112105877 __pyx_mstate_global->__pyx_int_112105877 -#define __pyx_int_136983863 __pyx_mstate_global->__pyx_int_136983863 -#define __pyx_int_184977713 __pyx_mstate_global->__pyx_int_184977713 -#define __pyx_int_neg_1 __pyx_mstate_global->__pyx_int_neg_1 -#define __pyx_k__13 __pyx_mstate_global->__pyx_k__13 -#define __pyx_slice__5 __pyx_mstate_global->__pyx_slice__5 -#define __pyx_tuple__4 __pyx_mstate_global->__pyx_tuple__4 -#define __pyx_tuple__8 __pyx_mstate_global->__pyx_tuple__8 -#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9 -#define __pyx_tuple__10 __pyx_mstate_global->__pyx_tuple__10 -#define __pyx_tuple__14 __pyx_mstate_global->__pyx_tuple__14 -#define __pyx_tuple__15 __pyx_mstate_global->__pyx_tuple__15 -#define __pyx_tuple__17 __pyx_mstate_global->__pyx_tuple__17 -#define __pyx_tuple__18 __pyx_mstate_global->__pyx_tuple__18 -#define __pyx_tuple__19 __pyx_mstate_global->__pyx_tuple__19 -#define __pyx_tuple__20 __pyx_mstate_global->__pyx_tuple__20 -#define __pyx_tuple__21 __pyx_mstate_global->__pyx_tuple__21 -#define __pyx_tuple__22 __pyx_mstate_global->__pyx_tuple__22 -#define __pyx_tuple__23 __pyx_mstate_global->__pyx_tuple__23 -#define __pyx_tuple__24 __pyx_mstate_global->__pyx_tuple__24 -#define __pyx_tuple__25 __pyx_mstate_global->__pyx_tuple__25 -#define __pyx_tuple__26 __pyx_mstate_global->__pyx_tuple__26 -#define __pyx_tuple__27 __pyx_mstate_global->__pyx_tuple__27 -#define __pyx_tuple__28 __pyx_mstate_global->__pyx_tuple__28 -#define __pyx_tuple__30 __pyx_mstate_global->__pyx_tuple__30 -#define __pyx_tuple__32 __pyx_mstate_global->__pyx_tuple__32 -#define __pyx_tuple__34 __pyx_mstate_global->__pyx_tuple__34 -#define __pyx_tuple__37 __pyx_mstate_global->__pyx_tuple__37 -#define __pyx_tuple__41 __pyx_mstate_global->__pyx_tuple__41 -#define __pyx_tuple__43 __pyx_mstate_global->__pyx_tuple__43 -#define __pyx_tuple__44 __pyx_mstate_global->__pyx_tuple__44 -#define __pyx_tuple__45 __pyx_mstate_global->__pyx_tuple__45 -#define __pyx_tuple__50 __pyx_mstate_global->__pyx_tuple__50 -#define __pyx_tuple__52 __pyx_mstate_global->__pyx_tuple__52 -#define __pyx_tuple__53 __pyx_mstate_global->__pyx_tuple__53 -#define __pyx_tuple__55 __pyx_mstate_global->__pyx_tuple__55 -#define __pyx_tuple__56 __pyx_mstate_global->__pyx_tuple__56 -#define __pyx_codeobj__11 __pyx_mstate_global->__pyx_codeobj__11 -#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12 -#define __pyx_codeobj__16 __pyx_mstate_global->__pyx_codeobj__16 -#define __pyx_codeobj__29 __pyx_mstate_global->__pyx_codeobj__29 -#define __pyx_codeobj__31 __pyx_mstate_global->__pyx_codeobj__31 -#define __pyx_codeobj__33 __pyx_mstate_global->__pyx_codeobj__33 -#define __pyx_codeobj__35 __pyx_mstate_global->__pyx_codeobj__35 -#define __pyx_codeobj__36 __pyx_mstate_global->__pyx_codeobj__36 -#define __pyx_codeobj__38 __pyx_mstate_global->__pyx_codeobj__38 -#define __pyx_codeobj__39 __pyx_mstate_global->__pyx_codeobj__39 -#define __pyx_codeobj__40 __pyx_mstate_global->__pyx_codeobj__40 -#define __pyx_codeobj__42 __pyx_mstate_global->__pyx_codeobj__42 -#define __pyx_codeobj__46 __pyx_mstate_global->__pyx_codeobj__46 -#define __pyx_codeobj__47 __pyx_mstate_global->__pyx_codeobj__47 -#define __pyx_codeobj__48 __pyx_mstate_global->__pyx_codeobj__48 -#define __pyx_codeobj__49 __pyx_mstate_global->__pyx_codeobj__49 -#define __pyx_codeobj__51 __pyx_mstate_global->__pyx_codeobj__51 -#define __pyx_codeobj__54 __pyx_mstate_global->__pyx_codeobj__54 -#define __pyx_codeobj__57 __pyx_mstate_global->__pyx_codeobj__57 -#define __pyx_codeobj__58 __pyx_mstate_global->__pyx_codeobj__58 -/* #### Code section: module_code ### */ - -/* "string.to_py":31 - * - * @cname("__pyx_convert_PyObject_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyObject_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyObject_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - -static CYTHON_INLINE PyObject *__pyx_convert_PyObject_string_to_py_std__in_string(std::string const &__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_convert_PyObject_string_to_py_std__in_string", 0); - - /* "string.to_py":32 - * @cname("__pyx_convert_PyObject_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyObject_string_to_py_std__in_string(const string& s): - * return __Pyx_PyObject_FromStringAndSize(s.data(), s.size()) # <<<<<<<<<<<<<< - * cdef extern from *: - * cdef object __Pyx_PyUnicode_FromStringAndSize(const char*, size_t) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_FromStringAndSize(__pyx_v_s.data(), __pyx_v_s.size()); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 32, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "string.to_py":31 - * - * @cname("__pyx_convert_PyObject_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyObject_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyObject_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("string.to_py.__pyx_convert_PyObject_string_to_py_std__in_string", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "string.to_py":37 - * - * @cname("__pyx_convert_PyUnicode_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyUnicode_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyUnicode_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - -static CYTHON_INLINE PyObject *__pyx_convert_PyUnicode_string_to_py_std__in_string(std::string const &__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_convert_PyUnicode_string_to_py_std__in_string", 0); - - /* "string.to_py":38 - * @cname("__pyx_convert_PyUnicode_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyUnicode_string_to_py_std__in_string(const string& s): - * return __Pyx_PyUnicode_FromStringAndSize(s.data(), s.size()) # <<<<<<<<<<<<<< - * cdef extern from *: - * cdef object __Pyx_PyStr_FromStringAndSize(const char*, size_t) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyUnicode_FromStringAndSize(__pyx_v_s.data(), __pyx_v_s.size()); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 38, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "string.to_py":37 - * - * @cname("__pyx_convert_PyUnicode_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyUnicode_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyUnicode_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("string.to_py.__pyx_convert_PyUnicode_string_to_py_std__in_string", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "string.to_py":43 - * - * @cname("__pyx_convert_PyStr_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyStr_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyStr_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - -static CYTHON_INLINE PyObject *__pyx_convert_PyStr_string_to_py_std__in_string(std::string const &__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_convert_PyStr_string_to_py_std__in_string", 0); - - /* "string.to_py":44 - * @cname("__pyx_convert_PyStr_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyStr_string_to_py_std__in_string(const string& s): - * return __Pyx_PyStr_FromStringAndSize(s.data(), s.size()) # <<<<<<<<<<<<<< - * cdef extern from *: - * cdef object __Pyx_PyBytes_FromStringAndSize(const char*, size_t) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyStr_FromStringAndSize(__pyx_v_s.data(), __pyx_v_s.size()); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 44, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "string.to_py":43 - * - * @cname("__pyx_convert_PyStr_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyStr_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyStr_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("string.to_py.__pyx_convert_PyStr_string_to_py_std__in_string", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "string.to_py":49 - * - * @cname("__pyx_convert_PyBytes_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyBytes_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyBytes_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - -static CYTHON_INLINE PyObject *__pyx_convert_PyBytes_string_to_py_std__in_string(std::string const &__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_convert_PyBytes_string_to_py_std__in_string", 0); - - /* "string.to_py":50 - * @cname("__pyx_convert_PyBytes_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyBytes_string_to_py_std__in_string(const string& s): - * return __Pyx_PyBytes_FromStringAndSize(s.data(), s.size()) # <<<<<<<<<<<<<< - * cdef extern from *: - * cdef object __Pyx_PyByteArray_FromStringAndSize(const char*, size_t) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_s.data(), __pyx_v_s.size()); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 50, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "string.to_py":49 - * - * @cname("__pyx_convert_PyBytes_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyBytes_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyBytes_FromStringAndSize(s.data(), s.size()) - * cdef extern from *: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("string.to_py.__pyx_convert_PyBytes_string_to_py_std__in_string", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "string.to_py":55 - * - * @cname("__pyx_convert_PyByteArray_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyByteArray_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyByteArray_FromStringAndSize(s.data(), s.size()) - * - */ - -static CYTHON_INLINE PyObject *__pyx_convert_PyByteArray_string_to_py_std__in_string(std::string const &__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_convert_PyByteArray_string_to_py_std__in_string", 0); - - /* "string.to_py":56 - * @cname("__pyx_convert_PyByteArray_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyByteArray_string_to_py_std__in_string(const string& s): - * return __Pyx_PyByteArray_FromStringAndSize(s.data(), s.size()) # <<<<<<<<<<<<<< - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyByteArray_FromStringAndSize(__pyx_v_s.data(), __pyx_v_s.size()); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 56, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "string.to_py":55 - * - * @cname("__pyx_convert_PyByteArray_string_to_py_std__in_string") - * cdef inline object __pyx_convert_PyByteArray_string_to_py_std__in_string(const string& s): # <<<<<<<<<<<<<< - * return __Pyx_PyByteArray_FromStringAndSize(s.data(), s.size()) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("string.to_py.__pyx_convert_PyByteArray_string_to_py_std__in_string", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":131 - * cdef bint dtype_is_object - * - * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, # <<<<<<<<<<<<<< - * mode="c", bint allocate_buffer=True): - * - */ - -/* Python wrapper */ -static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_shape = 0; - Py_ssize_t __pyx_v_itemsize; - PyObject *__pyx_v_format = 0; - PyObject *__pyx_v_mode = 0; - int __pyx_v_allocate_buffer; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[5] = {0,0,0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 131, __pyx_L3_error) - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_shape,&__pyx_n_s_itemsize,&__pyx_n_s_format,&__pyx_n_s_mode,&__pyx_n_s_allocate_buffer,0}; - values[3] = __Pyx_Arg_NewRef_VARARGS(((PyObject *)__pyx_n_s_c)); - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 5: values[4] = __Pyx_Arg_VARARGS(__pyx_args, 4); - CYTHON_FALLTHROUGH; - case 4: values[3] = __Pyx_Arg_VARARGS(__pyx_args, 3); - CYTHON_FALLTHROUGH; - case 3: values[2] = __Pyx_Arg_VARARGS(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_VARARGS(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_shape)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 131, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (likely((values[1] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_itemsize)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[1]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 131, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 1); __PYX_ERR(1, 131, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (likely((values[2] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_format)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[2]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 131, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 2); __PYX_ERR(1, 131, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 3: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_mode); - if (value) { values[3] = __Pyx_Arg_NewRef_VARARGS(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 131, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 4: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_allocate_buffer); - if (value) { values[4] = __Pyx_Arg_NewRef_VARARGS(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 131, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(1, 131, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 5: values[4] = __Pyx_Arg_VARARGS(__pyx_args, 4); - CYTHON_FALLTHROUGH; - case 4: values[3] = __Pyx_Arg_VARARGS(__pyx_args, 3); - CYTHON_FALLTHROUGH; - case 3: values[2] = __Pyx_Arg_VARARGS(__pyx_args, 2); - values[1] = __Pyx_Arg_VARARGS(__pyx_args, 1); - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_shape = ((PyObject*)values[0]); - __pyx_v_itemsize = __Pyx_PyIndex_AsSsize_t(values[1]); if (unlikely((__pyx_v_itemsize == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 131, __pyx_L3_error) - __pyx_v_format = values[2]; - __pyx_v_mode = values[3]; - if (values[4]) { - __pyx_v_allocate_buffer = __Pyx_PyObject_IsTrue(values[4]); if (unlikely((__pyx_v_allocate_buffer == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 132, __pyx_L3_error) - } else { - - /* "View.MemoryView":132 - * - * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, - * mode="c", bint allocate_buffer=True): # <<<<<<<<<<<<<< - * - * cdef int idx - */ - __pyx_v_allocate_buffer = ((int)1); - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, __pyx_nargs); __PYX_ERR(1, 131, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.array.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_shape), (&PyTuple_Type), 1, "shape", 1))) __PYX_ERR(1, 131, __pyx_L1_error) - if (unlikely(((PyObject *)__pyx_v_format) == Py_None)) { - PyErr_Format(PyExc_TypeError, "Argument '%.200s' must not be None", "format"); __PYX_ERR(1, 131, __pyx_L1_error) - } - __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(((struct __pyx_array_obj *)__pyx_v_self), __pyx_v_shape, __pyx_v_itemsize, __pyx_v_format, __pyx_v_mode, __pyx_v_allocate_buffer); - - /* "View.MemoryView":131 - * cdef bint dtype_is_object - * - * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, # <<<<<<<<<<<<<< - * mode="c", bint allocate_buffer=True): - * - */ - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = -1; - __pyx_L0:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer) { - int __pyx_v_idx; - Py_ssize_t __pyx_v_dim; - char __pyx_v_order; - int __pyx_r; - __Pyx_RefNannyDeclarations - Py_ssize_t __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - int __pyx_t_7; - char *__pyx_t_8; - Py_ssize_t __pyx_t_9; - Py_UCS4 __pyx_t_10; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__cinit__", 0); - __Pyx_INCREF(__pyx_v_format); - - /* "View.MemoryView":137 - * cdef Py_ssize_t dim - * - * self.ndim = len(shape) # <<<<<<<<<<<<<< - * self.itemsize = itemsize - * - */ - if (unlikely(__pyx_v_shape == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(1, 137, __pyx_L1_error) - } - __pyx_t_1 = __Pyx_PyTuple_GET_SIZE(__pyx_v_shape); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 137, __pyx_L1_error) - __pyx_v_self->ndim = ((int)__pyx_t_1); - - /* "View.MemoryView":138 - * - * self.ndim = len(shape) - * self.itemsize = itemsize # <<<<<<<<<<<<<< - * - * if not self.ndim: - */ - __pyx_v_self->itemsize = __pyx_v_itemsize; - - /* "View.MemoryView":140 - * self.itemsize = itemsize - * - * if not self.ndim: # <<<<<<<<<<<<<< - * raise ValueError, "Empty shape tuple for cython.array" - * - */ - __pyx_t_2 = (!(__pyx_v_self->ndim != 0)); - if (unlikely(__pyx_t_2)) { - - /* "View.MemoryView":141 - * - * if not self.ndim: - * raise ValueError, "Empty shape tuple for cython.array" # <<<<<<<<<<<<<< - * - * if itemsize <= 0: - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_Empty_shape_tuple_for_cython_arr, 0, 0); - __PYX_ERR(1, 141, __pyx_L1_error) - - /* "View.MemoryView":140 - * self.itemsize = itemsize - * - * if not self.ndim: # <<<<<<<<<<<<<< - * raise ValueError, "Empty shape tuple for cython.array" - * - */ - } - - /* "View.MemoryView":143 - * raise ValueError, "Empty shape tuple for cython.array" - * - * if itemsize <= 0: # <<<<<<<<<<<<<< - * raise ValueError, "itemsize <= 0 for cython.array" - * - */ - __pyx_t_2 = (__pyx_v_itemsize <= 0); - if (unlikely(__pyx_t_2)) { - - /* "View.MemoryView":144 - * - * if itemsize <= 0: - * raise ValueError, "itemsize <= 0 for cython.array" # <<<<<<<<<<<<<< - * - * if not isinstance(format, bytes): - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_itemsize_0_for_cython_array, 0, 0); - __PYX_ERR(1, 144, __pyx_L1_error) - - /* "View.MemoryView":143 - * raise ValueError, "Empty shape tuple for cython.array" - * - * if itemsize <= 0: # <<<<<<<<<<<<<< - * raise ValueError, "itemsize <= 0 for cython.array" - * - */ - } - - /* "View.MemoryView":146 - * raise ValueError, "itemsize <= 0 for cython.array" - * - * if not isinstance(format, bytes): # <<<<<<<<<<<<<< - * format = format.encode('ASCII') - * self._format = format # keep a reference to the byte string - */ - __pyx_t_2 = PyBytes_Check(__pyx_v_format); - __pyx_t_3 = (!__pyx_t_2); - if (__pyx_t_3) { - - /* "View.MemoryView":147 - * - * if not isinstance(format, bytes): - * format = format.encode('ASCII') # <<<<<<<<<<<<<< - * self._format = format # keep a reference to the byte string - * self.format = self._format - */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_format, __pyx_n_s_encode); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 147, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = NULL; - __pyx_t_7 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_5))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); - if (likely(__pyx_t_6)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); - __Pyx_INCREF(__pyx_t_6); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_5, function); - __pyx_t_7 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_n_s_ASCII}; - __pyx_t_4 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_7, 1+__pyx_t_7); - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 147, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - } - __Pyx_DECREF_SET(__pyx_v_format, __pyx_t_4); - __pyx_t_4 = 0; - - /* "View.MemoryView":146 - * raise ValueError, "itemsize <= 0 for cython.array" - * - * if not isinstance(format, bytes): # <<<<<<<<<<<<<< - * format = format.encode('ASCII') - * self._format = format # keep a reference to the byte string - */ - } - - /* "View.MemoryView":148 - * if not isinstance(format, bytes): - * format = format.encode('ASCII') - * self._format = format # keep a reference to the byte string # <<<<<<<<<<<<<< - * self.format = self._format - * - */ - if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_v_format))) __PYX_ERR(1, 148, __pyx_L1_error) - __pyx_t_4 = __pyx_v_format; - __Pyx_INCREF(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - __Pyx_GOTREF(__pyx_v_self->_format); - __Pyx_DECREF(__pyx_v_self->_format); - __pyx_v_self->_format = ((PyObject*)__pyx_t_4); - __pyx_t_4 = 0; - - /* "View.MemoryView":149 - * format = format.encode('ASCII') - * self._format = format # keep a reference to the byte string - * self.format = self._format # <<<<<<<<<<<<<< - * - * - */ - if (unlikely(__pyx_v_self->_format == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(1, 149, __pyx_L1_error) - } - __pyx_t_8 = __Pyx_PyBytes_AsWritableString(__pyx_v_self->_format); if (unlikely((!__pyx_t_8) && PyErr_Occurred())) __PYX_ERR(1, 149, __pyx_L1_error) - __pyx_v_self->format = __pyx_t_8; - - /* "View.MemoryView":152 - * - * - * self._shape = PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2) # <<<<<<<<<<<<<< - * self._strides = self._shape + self.ndim - * - */ - __pyx_v_self->_shape = ((Py_ssize_t *)PyObject_Malloc((((sizeof(Py_ssize_t)) * __pyx_v_self->ndim) * 2))); - - /* "View.MemoryView":153 - * - * self._shape = PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2) - * self._strides = self._shape + self.ndim # <<<<<<<<<<<<<< - * - * if not self._shape: - */ - __pyx_v_self->_strides = (__pyx_v_self->_shape + __pyx_v_self->ndim); - - /* "View.MemoryView":155 - * self._strides = self._shape + self.ndim - * - * if not self._shape: # <<<<<<<<<<<<<< - * raise MemoryError, "unable to allocate shape and strides." - * - */ - __pyx_t_3 = (!(__pyx_v_self->_shape != 0)); - if (unlikely(__pyx_t_3)) { - - /* "View.MemoryView":156 - * - * if not self._shape: - * raise MemoryError, "unable to allocate shape and strides." # <<<<<<<<<<<<<< - * - * - */ - __Pyx_Raise(__pyx_builtin_MemoryError, __pyx_kp_s_unable_to_allocate_shape_and_str, 0, 0); - __PYX_ERR(1, 156, __pyx_L1_error) - - /* "View.MemoryView":155 - * self._strides = self._shape + self.ndim - * - * if not self._shape: # <<<<<<<<<<<<<< - * raise MemoryError, "unable to allocate shape and strides." - * - */ - } - - /* "View.MemoryView":159 - * - * - * for idx, dim in enumerate(shape): # <<<<<<<<<<<<<< - * if dim <= 0: - * raise ValueError, f"Invalid shape in axis {idx}: {dim}." - */ - __pyx_t_7 = 0; - __pyx_t_4 = __pyx_v_shape; __Pyx_INCREF(__pyx_t_4); __pyx_t_1 = 0; - for (;;) { - if (__pyx_t_1 >= PyTuple_GET_SIZE(__pyx_t_4)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_1); __Pyx_INCREF(__pyx_t_5); __pyx_t_1++; if (unlikely((0 < 0))) __PYX_ERR(1, 159, __pyx_L1_error) - #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 159, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - #endif - __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_5); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 159, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_v_dim = __pyx_t_9; - __pyx_v_idx = __pyx_t_7; - __pyx_t_7 = (__pyx_t_7 + 1); - - /* "View.MemoryView":160 - * - * for idx, dim in enumerate(shape): - * if dim <= 0: # <<<<<<<<<<<<<< - * raise ValueError, f"Invalid shape in axis {idx}: {dim}." - * self._shape[idx] = dim - */ - __pyx_t_3 = (__pyx_v_dim <= 0); - if (unlikely(__pyx_t_3)) { - - /* "View.MemoryView":161 - * for idx, dim in enumerate(shape): - * if dim <= 0: - * raise ValueError, f"Invalid shape in axis {idx}: {dim}." # <<<<<<<<<<<<<< - * self._shape[idx] = dim - * - */ - __pyx_t_5 = PyTuple_New(5); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 161, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_9 = 0; - __pyx_t_10 = 127; - __Pyx_INCREF(__pyx_kp_u_Invalid_shape_in_axis); - __pyx_t_9 += 22; - __Pyx_GIVEREF(__pyx_kp_u_Invalid_shape_in_axis); - PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u_Invalid_shape_in_axis); - __pyx_t_6 = __Pyx_PyUnicode_From_int(__pyx_v_idx, 0, ' ', 'd'); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 161, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6); - __Pyx_GIVEREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_6); - __pyx_t_6 = 0; - __Pyx_INCREF(__pyx_kp_u_); - __pyx_t_9 += 2; - __Pyx_GIVEREF(__pyx_kp_u_); - PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u_); - __pyx_t_6 = __Pyx_PyUnicode_From_Py_ssize_t(__pyx_v_dim, 0, ' ', 'd'); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 161, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_6); - __Pyx_GIVEREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_5, 3, __pyx_t_6); - __pyx_t_6 = 0; - __Pyx_INCREF(__pyx_kp_u__2); - __pyx_t_9 += 1; - __Pyx_GIVEREF(__pyx_kp_u__2); - PyTuple_SET_ITEM(__pyx_t_5, 4, __pyx_kp_u__2); - __pyx_t_6 = __Pyx_PyUnicode_Join(__pyx_t_5, 5, __pyx_t_9, __pyx_t_10); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 161, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_t_6, 0, 0); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __PYX_ERR(1, 161, __pyx_L1_error) - - /* "View.MemoryView":160 - * - * for idx, dim in enumerate(shape): - * if dim <= 0: # <<<<<<<<<<<<<< - * raise ValueError, f"Invalid shape in axis {idx}: {dim}." - * self._shape[idx] = dim - */ - } - - /* "View.MemoryView":162 - * if dim <= 0: - * raise ValueError, f"Invalid shape in axis {idx}: {dim}." - * self._shape[idx] = dim # <<<<<<<<<<<<<< - * - * cdef char order - */ - (__pyx_v_self->_shape[__pyx_v_idx]) = __pyx_v_dim; - - /* "View.MemoryView":159 - * - * - * for idx, dim in enumerate(shape): # <<<<<<<<<<<<<< - * if dim <= 0: - * raise ValueError, f"Invalid shape in axis {idx}: {dim}." - */ - } - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "View.MemoryView":165 - * - * cdef char order - * if mode == 'c': # <<<<<<<<<<<<<< - * order = b'C' - * self.mode = u'c' - */ - __pyx_t_3 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_c, Py_EQ)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(1, 165, __pyx_L1_error) - if (__pyx_t_3) { - - /* "View.MemoryView":166 - * cdef char order - * if mode == 'c': - * order = b'C' # <<<<<<<<<<<<<< - * self.mode = u'c' - * elif mode == 'fortran': - */ - __pyx_v_order = 'C'; - - /* "View.MemoryView":167 - * if mode == 'c': - * order = b'C' - * self.mode = u'c' # <<<<<<<<<<<<<< - * elif mode == 'fortran': - * order = b'F' - */ - __Pyx_INCREF(__pyx_n_u_c); - __Pyx_GIVEREF(__pyx_n_u_c); - __Pyx_GOTREF(__pyx_v_self->mode); - __Pyx_DECREF(__pyx_v_self->mode); - __pyx_v_self->mode = __pyx_n_u_c; - - /* "View.MemoryView":165 - * - * cdef char order - * if mode == 'c': # <<<<<<<<<<<<<< - * order = b'C' - * self.mode = u'c' - */ - goto __pyx_L11; - } - - /* "View.MemoryView":168 - * order = b'C' - * self.mode = u'c' - * elif mode == 'fortran': # <<<<<<<<<<<<<< - * order = b'F' - * self.mode = u'fortran' - */ - __pyx_t_3 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_fortran, Py_EQ)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(1, 168, __pyx_L1_error) - if (likely(__pyx_t_3)) { - - /* "View.MemoryView":169 - * self.mode = u'c' - * elif mode == 'fortran': - * order = b'F' # <<<<<<<<<<<<<< - * self.mode = u'fortran' - * else: - */ - __pyx_v_order = 'F'; - - /* "View.MemoryView":170 - * elif mode == 'fortran': - * order = b'F' - * self.mode = u'fortran' # <<<<<<<<<<<<<< - * else: - * raise ValueError, f"Invalid mode, expected 'c' or 'fortran', got {mode}" - */ - __Pyx_INCREF(__pyx_n_u_fortran); - __Pyx_GIVEREF(__pyx_n_u_fortran); - __Pyx_GOTREF(__pyx_v_self->mode); - __Pyx_DECREF(__pyx_v_self->mode); - __pyx_v_self->mode = __pyx_n_u_fortran; - - /* "View.MemoryView":168 - * order = b'C' - * self.mode = u'c' - * elif mode == 'fortran': # <<<<<<<<<<<<<< - * order = b'F' - * self.mode = u'fortran' - */ - goto __pyx_L11; - } - - /* "View.MemoryView":172 - * self.mode = u'fortran' - * else: - * raise ValueError, f"Invalid mode, expected 'c' or 'fortran', got {mode}" # <<<<<<<<<<<<<< - * - * self.len = fill_contig_strides_array(self._shape, self._strides, itemsize, self.ndim, order) - */ - /*else*/ { - __pyx_t_4 = __Pyx_PyObject_FormatSimple(__pyx_v_mode, __pyx_empty_unicode); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = __Pyx_PyUnicode_Concat(__pyx_kp_u_Invalid_mode_expected_c_or_fortr, __pyx_t_4); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 172, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_t_6, 0, 0); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __PYX_ERR(1, 172, __pyx_L1_error) - } - __pyx_L11:; - - /* "View.MemoryView":174 - * raise ValueError, f"Invalid mode, expected 'c' or 'fortran', got {mode}" - * - * self.len = fill_contig_strides_array(self._shape, self._strides, itemsize, self.ndim, order) # <<<<<<<<<<<<<< - * - * self.free_data = allocate_buffer - */ - __pyx_v_self->len = __pyx_fill_contig_strides_array(__pyx_v_self->_shape, __pyx_v_self->_strides, __pyx_v_itemsize, __pyx_v_self->ndim, __pyx_v_order); - - /* "View.MemoryView":176 - * self.len = fill_contig_strides_array(self._shape, self._strides, itemsize, self.ndim, order) - * - * self.free_data = allocate_buffer # <<<<<<<<<<<<<< - * self.dtype_is_object = format == b'O' - * - */ - __pyx_v_self->free_data = __pyx_v_allocate_buffer; - - /* "View.MemoryView":177 - * - * self.free_data = allocate_buffer - * self.dtype_is_object = format == b'O' # <<<<<<<<<<<<<< - * - * if allocate_buffer: - */ - __pyx_t_6 = PyObject_RichCompare(__pyx_v_format, __pyx_n_b_O, Py_EQ); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 177, __pyx_L1_error) - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 177, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_self->dtype_is_object = __pyx_t_3; - - /* "View.MemoryView":179 - * self.dtype_is_object = format == b'O' - * - * if allocate_buffer: # <<<<<<<<<<<<<< - * _allocate_buffer(self) - * - */ - if (__pyx_v_allocate_buffer) { - - /* "View.MemoryView":180 - * - * if allocate_buffer: - * _allocate_buffer(self) # <<<<<<<<<<<<<< - * - * @cname('getbuffer') - */ - __pyx_t_7 = __pyx_array_allocate_buffer(__pyx_v_self); if (unlikely(__pyx_t_7 == ((int)-1))) __PYX_ERR(1, 180, __pyx_L1_error) - - /* "View.MemoryView":179 - * self.dtype_is_object = format == b'O' - * - * if allocate_buffer: # <<<<<<<<<<<<<< - * _allocate_buffer(self) - * - */ - } - - /* "View.MemoryView":131 - * cdef bint dtype_is_object - * - * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, # <<<<<<<<<<<<<< - * mode="c", bint allocate_buffer=True): - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_AddTraceback("View.MemoryView.array.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_format); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":182 - * _allocate_buffer(self) - * - * @cname('getbuffer') # <<<<<<<<<<<<<< - * def __getbuffer__(self, Py_buffer *info, int flags): - * cdef int bufmode = -1 - */ - -/* Python wrapper */ -CYTHON_UNUSED static int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ -CYTHON_UNUSED static int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(((struct __pyx_array_obj *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { - int __pyx_v_bufmode; - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - char *__pyx_t_2; - Py_ssize_t __pyx_t_3; - int __pyx_t_4; - Py_ssize_t *__pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely(__pyx_v_info == NULL)) { - PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete"); - return -1; - } - __Pyx_RefNannySetupContext("__getbuffer__", 0); - __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(__pyx_v_info->obj); - - /* "View.MemoryView":184 - * @cname('getbuffer') - * def __getbuffer__(self, Py_buffer *info, int flags): - * cdef int bufmode = -1 # <<<<<<<<<<<<<< - * if flags & (PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS): - * if self.mode == u"c": - */ - __pyx_v_bufmode = -1; - - /* "View.MemoryView":185 - * def __getbuffer__(self, Py_buffer *info, int flags): - * cdef int bufmode = -1 - * if flags & (PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS): # <<<<<<<<<<<<<< - * if self.mode == u"c": - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - */ - __pyx_t_1 = ((__pyx_v_flags & ((PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS) | PyBUF_ANY_CONTIGUOUS)) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":186 - * cdef int bufmode = -1 - * if flags & (PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS): - * if self.mode == u"c": # <<<<<<<<<<<<<< - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * elif self.mode == u"fortran": - */ - __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_c, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(1, 186, __pyx_L1_error) - if (__pyx_t_1) { - - /* "View.MemoryView":187 - * if flags & (PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS): - * if self.mode == u"c": - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS # <<<<<<<<<<<<<< - * elif self.mode == u"fortran": - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - */ - __pyx_v_bufmode = (PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS); - - /* "View.MemoryView":186 - * cdef int bufmode = -1 - * if flags & (PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS): - * if self.mode == u"c": # <<<<<<<<<<<<<< - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * elif self.mode == u"fortran": - */ - goto __pyx_L4; - } - - /* "View.MemoryView":188 - * if self.mode == u"c": - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * elif self.mode == u"fortran": # <<<<<<<<<<<<<< - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * if not (flags & bufmode): - */ - __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_fortran, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(1, 188, __pyx_L1_error) - if (__pyx_t_1) { - - /* "View.MemoryView":189 - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * elif self.mode == u"fortran": - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS # <<<<<<<<<<<<<< - * if not (flags & bufmode): - * raise ValueError, "Can only create a buffer that is contiguous in memory." - */ - __pyx_v_bufmode = (PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS); - - /* "View.MemoryView":188 - * if self.mode == u"c": - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * elif self.mode == u"fortran": # <<<<<<<<<<<<<< - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * if not (flags & bufmode): - */ - } - __pyx_L4:; - - /* "View.MemoryView":190 - * elif self.mode == u"fortran": - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * if not (flags & bufmode): # <<<<<<<<<<<<<< - * raise ValueError, "Can only create a buffer that is contiguous in memory." - * info.buf = self.data - */ - __pyx_t_1 = (!((__pyx_v_flags & __pyx_v_bufmode) != 0)); - if (unlikely(__pyx_t_1)) { - - /* "View.MemoryView":191 - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * if not (flags & bufmode): - * raise ValueError, "Can only create a buffer that is contiguous in memory." # <<<<<<<<<<<<<< - * info.buf = self.data - * info.len = self.len - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_Can_only_create_a_buffer_that_is, 0, 0); - __PYX_ERR(1, 191, __pyx_L1_error) - - /* "View.MemoryView":190 - * elif self.mode == u"fortran": - * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - * if not (flags & bufmode): # <<<<<<<<<<<<<< - * raise ValueError, "Can only create a buffer that is contiguous in memory." - * info.buf = self.data - */ - } - - /* "View.MemoryView":185 - * def __getbuffer__(self, Py_buffer *info, int flags): - * cdef int bufmode = -1 - * if flags & (PyBUF_C_CONTIGUOUS | PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS): # <<<<<<<<<<<<<< - * if self.mode == u"c": - * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS - */ - } - - /* "View.MemoryView":192 - * if not (flags & bufmode): - * raise ValueError, "Can only create a buffer that is contiguous in memory." - * info.buf = self.data # <<<<<<<<<<<<<< - * info.len = self.len - * - */ - __pyx_t_2 = __pyx_v_self->data; - __pyx_v_info->buf = __pyx_t_2; - - /* "View.MemoryView":193 - * raise ValueError, "Can only create a buffer that is contiguous in memory." - * info.buf = self.data - * info.len = self.len # <<<<<<<<<<<<<< - * - * if flags & PyBUF_STRIDES: - */ - __pyx_t_3 = __pyx_v_self->len; - __pyx_v_info->len = __pyx_t_3; - - /* "View.MemoryView":195 - * info.len = self.len - * - * if flags & PyBUF_STRIDES: # <<<<<<<<<<<<<< - * info.ndim = self.ndim - * info.shape = self._shape - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_STRIDES) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":196 - * - * if flags & PyBUF_STRIDES: - * info.ndim = self.ndim # <<<<<<<<<<<<<< - * info.shape = self._shape - * info.strides = self._strides - */ - __pyx_t_4 = __pyx_v_self->ndim; - __pyx_v_info->ndim = __pyx_t_4; - - /* "View.MemoryView":197 - * if flags & PyBUF_STRIDES: - * info.ndim = self.ndim - * info.shape = self._shape # <<<<<<<<<<<<<< - * info.strides = self._strides - * else: - */ - __pyx_t_5 = __pyx_v_self->_shape; - __pyx_v_info->shape = __pyx_t_5; - - /* "View.MemoryView":198 - * info.ndim = self.ndim - * info.shape = self._shape - * info.strides = self._strides # <<<<<<<<<<<<<< - * else: - * info.ndim = 1 - */ - __pyx_t_5 = __pyx_v_self->_strides; - __pyx_v_info->strides = __pyx_t_5; - - /* "View.MemoryView":195 - * info.len = self.len - * - * if flags & PyBUF_STRIDES: # <<<<<<<<<<<<<< - * info.ndim = self.ndim - * info.shape = self._shape - */ - goto __pyx_L6; - } - - /* "View.MemoryView":200 - * info.strides = self._strides - * else: - * info.ndim = 1 # <<<<<<<<<<<<<< - * info.shape = &self.len if flags & PyBUF_ND else NULL - * info.strides = NULL - */ - /*else*/ { - __pyx_v_info->ndim = 1; - - /* "View.MemoryView":201 - * else: - * info.ndim = 1 - * info.shape = &self.len if flags & PyBUF_ND else NULL # <<<<<<<<<<<<<< - * info.strides = NULL - * - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_ND) != 0); - if (__pyx_t_1) { - __pyx_t_5 = (&__pyx_v_self->len); - } else { - __pyx_t_5 = NULL; - } - __pyx_v_info->shape = __pyx_t_5; - - /* "View.MemoryView":202 - * info.ndim = 1 - * info.shape = &self.len if flags & PyBUF_ND else NULL - * info.strides = NULL # <<<<<<<<<<<<<< - * - * info.suboffsets = NULL - */ - __pyx_v_info->strides = NULL; - } - __pyx_L6:; - - /* "View.MemoryView":204 - * info.strides = NULL - * - * info.suboffsets = NULL # <<<<<<<<<<<<<< - * info.itemsize = self.itemsize - * info.readonly = 0 - */ - __pyx_v_info->suboffsets = NULL; - - /* "View.MemoryView":205 - * - * info.suboffsets = NULL - * info.itemsize = self.itemsize # <<<<<<<<<<<<<< - * info.readonly = 0 - * info.format = self.format if flags & PyBUF_FORMAT else NULL - */ - __pyx_t_3 = __pyx_v_self->itemsize; - __pyx_v_info->itemsize = __pyx_t_3; - - /* "View.MemoryView":206 - * info.suboffsets = NULL - * info.itemsize = self.itemsize - * info.readonly = 0 # <<<<<<<<<<<<<< - * info.format = self.format if flags & PyBUF_FORMAT else NULL - * info.obj = self - */ - __pyx_v_info->readonly = 0; - - /* "View.MemoryView":207 - * info.itemsize = self.itemsize - * info.readonly = 0 - * info.format = self.format if flags & PyBUF_FORMAT else NULL # <<<<<<<<<<<<<< - * info.obj = self - * - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0); - if (__pyx_t_1) { - __pyx_t_2 = __pyx_v_self->format; - } else { - __pyx_t_2 = NULL; - } - __pyx_v_info->format = __pyx_t_2; - - /* "View.MemoryView":208 - * info.readonly = 0 - * info.format = self.format if flags & PyBUF_FORMAT else NULL - * info.obj = self # <<<<<<<<<<<<<< - * - * def __dealloc__(array self): - */ - __Pyx_INCREF((PyObject *)__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_v_self); - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); - __pyx_v_info->obj = ((PyObject *)__pyx_v_self); - - /* "View.MemoryView":182 - * _allocate_buffer(self) - * - * @cname('getbuffer') # <<<<<<<<<<<<<< - * def __getbuffer__(self, Py_buffer *info, int flags): - * cdef int bufmode = -1 - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.array.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - if (__pyx_v_info->obj != NULL) { - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; - } - goto __pyx_L2; - __pyx_L0:; - if (__pyx_v_info->obj == Py_None) { - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; - } - __pyx_L2:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":210 - * info.obj = self - * - * def __dealloc__(array self): # <<<<<<<<<<<<<< - * if self.callback_free_data != NULL: - * self.callback_free_data(self.data) - */ - -/* Python wrapper */ -static void __pyx_array___dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_array___dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(((struct __pyx_array_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self) { - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - __Pyx_RefNannySetupContext("__dealloc__", 0); - - /* "View.MemoryView":211 - * - * def __dealloc__(array self): - * if self.callback_free_data != NULL: # <<<<<<<<<<<<<< - * self.callback_free_data(self.data) - * elif self.free_data and self.data is not NULL: - */ - __pyx_t_1 = (__pyx_v_self->callback_free_data != NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":212 - * def __dealloc__(array self): - * if self.callback_free_data != NULL: - * self.callback_free_data(self.data) # <<<<<<<<<<<<<< - * elif self.free_data and self.data is not NULL: - * if self.dtype_is_object: - */ - __pyx_v_self->callback_free_data(__pyx_v_self->data); - - /* "View.MemoryView":211 - * - * def __dealloc__(array self): - * if self.callback_free_data != NULL: # <<<<<<<<<<<<<< - * self.callback_free_data(self.data) - * elif self.free_data and self.data is not NULL: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":213 - * if self.callback_free_data != NULL: - * self.callback_free_data(self.data) - * elif self.free_data and self.data is not NULL: # <<<<<<<<<<<<<< - * if self.dtype_is_object: - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) - */ - if (__pyx_v_self->free_data) { - } else { - __pyx_t_1 = __pyx_v_self->free_data; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_2 = (__pyx_v_self->data != NULL); - __pyx_t_1 = __pyx_t_2; - __pyx_L4_bool_binop_done:; - if (__pyx_t_1) { - - /* "View.MemoryView":214 - * self.callback_free_data(self.data) - * elif self.free_data and self.data is not NULL: - * if self.dtype_is_object: # <<<<<<<<<<<<<< - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) - * free(self.data) - */ - if (__pyx_v_self->dtype_is_object) { - - /* "View.MemoryView":215 - * elif self.free_data and self.data is not NULL: - * if self.dtype_is_object: - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) # <<<<<<<<<<<<<< - * free(self.data) - * PyObject_Free(self._shape) - */ - __pyx_memoryview_refcount_objects_in_slice(__pyx_v_self->data, __pyx_v_self->_shape, __pyx_v_self->_strides, __pyx_v_self->ndim, 0); - - /* "View.MemoryView":214 - * self.callback_free_data(self.data) - * elif self.free_data and self.data is not NULL: - * if self.dtype_is_object: # <<<<<<<<<<<<<< - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) - * free(self.data) - */ - } - - /* "View.MemoryView":216 - * if self.dtype_is_object: - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) - * free(self.data) # <<<<<<<<<<<<<< - * PyObject_Free(self._shape) - * - */ - free(__pyx_v_self->data); - - /* "View.MemoryView":213 - * if self.callback_free_data != NULL: - * self.callback_free_data(self.data) - * elif self.free_data and self.data is not NULL: # <<<<<<<<<<<<<< - * if self.dtype_is_object: - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) - */ - } - __pyx_L3:; - - /* "View.MemoryView":217 - * refcount_objects_in_slice(self.data, self._shape, self._strides, self.ndim, inc=False) - * free(self.data) - * PyObject_Free(self._shape) # <<<<<<<<<<<<<< - * - * @property - */ - PyObject_Free(__pyx_v_self->_shape); - - /* "View.MemoryView":210 - * info.obj = self - * - * def __dealloc__(array self): # <<<<<<<<<<<<<< - * if self.callback_free_data != NULL: - * self.callback_free_data(self.data) - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "View.MemoryView":219 - * PyObject_Free(self._shape) - * - * @property # <<<<<<<<<<<<<< - * def memview(self): - * return self.get_memview() - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_5array_7memview___get__(((struct __pyx_array_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct __pyx_array_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":221 - * @property - * def memview(self): - * return self.get_memview() # <<<<<<<<<<<<<< - * - * @cname('get_memview') - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((struct __pyx_vtabstruct_array *)__pyx_v_self->__pyx_vtab)->get_memview(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 221, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "View.MemoryView":219 - * PyObject_Free(self._shape) - * - * @property # <<<<<<<<<<<<<< - * def memview(self): - * return self.get_memview() - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.array.memview.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":224 - * - * @cname('get_memview') - * cdef get_memview(self): # <<<<<<<<<<<<<< - * flags = PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE - * return memoryview(self, flags, self.dtype_is_object) - */ - -static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) { - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get_memview", 0); - - /* "View.MemoryView":225 - * @cname('get_memview') - * cdef get_memview(self): - * flags = PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE # <<<<<<<<<<<<<< - * return memoryview(self, flags, self.dtype_is_object) - * - */ - __pyx_v_flags = ((PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT) | PyBUF_WRITABLE); - - /* "View.MemoryView":226 - * cdef get_memview(self): - * flags = PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE - * return memoryview(self, flags, self.dtype_is_object) # <<<<<<<<<<<<<< - * - * def __len__(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_v_self); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_self))) __PYX_ERR(1, 226, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1)) __PYX_ERR(1, 226, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2)) __PYX_ERR(1, 226, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 226, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":224 - * - * @cname('get_memview') - * cdef get_memview(self): # <<<<<<<<<<<<<< - * flags = PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE - * return memoryview(self, flags, self.dtype_is_object) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.array.get_memview", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":228 - * return memoryview(self, flags, self.dtype_is_object) - * - * def __len__(self): # <<<<<<<<<<<<<< - * return self._shape[0] - * - */ - -/* Python wrapper */ -static Py_ssize_t __pyx_array___len__(PyObject *__pyx_v_self); /*proto*/ -static Py_ssize_t __pyx_array___len__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(((struct __pyx_array_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(struct __pyx_array_obj *__pyx_v_self) { - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__", 0); - - /* "View.MemoryView":229 - * - * def __len__(self): - * return self._shape[0] # <<<<<<<<<<<<<< - * - * def __getattr__(self, attr): - */ - __pyx_r = (__pyx_v_self->_shape[0]); - goto __pyx_L0; - - /* "View.MemoryView":228 - * return memoryview(self, flags, self.dtype_is_object) - * - * def __len__(self): # <<<<<<<<<<<<<< - * return self._shape[0] - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":231 - * return self._shape[0] - * - * def __getattr__(self, attr): # <<<<<<<<<<<<<< - * return getattr(self.memview, attr) - * - */ - -/* Python wrapper */ -static PyObject *__pyx_array___getattr__(PyObject *__pyx_v_self, PyObject *__pyx_v_attr); /*proto*/ -static PyObject *__pyx_array___getattr__(PyObject *__pyx_v_self, PyObject *__pyx_v_attr) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getattr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v_attr)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_attr) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__getattr__", 0); - - /* "View.MemoryView":232 - * - * def __getattr__(self, attr): - * return getattr(self.memview, attr) # <<<<<<<<<<<<<< - * - * def __getitem__(self, item): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 232, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_GetAttr(__pyx_t_1, __pyx_v_attr); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 232, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":231 - * return self._shape[0] - * - * def __getattr__(self, attr): # <<<<<<<<<<<<<< - * return getattr(self.memview, attr) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.array.__getattr__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":234 - * return getattr(self.memview, attr) - * - * def __getitem__(self, item): # <<<<<<<<<<<<<< - * return self.memview[item] - * - */ - -/* Python wrapper */ -static PyObject *__pyx_array___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item); /*proto*/ -static PyObject *__pyx_array___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v_item)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__getitem__", 0); - - /* "View.MemoryView":235 - * - * def __getitem__(self, item): - * return self.memview[item] # <<<<<<<<<<<<<< - * - * def __setitem__(self, item, value): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 235, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_v_item); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 235, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":234 - * return getattr(self.memview, attr) - * - * def __getitem__(self, item): # <<<<<<<<<<<<<< - * return self.memview[item] - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.array.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":237 - * return self.memview[item] - * - * def __setitem__(self, item, value): # <<<<<<<<<<<<<< - * self.memview[item] = value - * - */ - -/* Python wrapper */ -static int __pyx_array___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_array___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setitem__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(((struct __pyx_array_obj *)__pyx_v_self), ((PyObject *)__pyx_v_item), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setitem__", 0); - - /* "View.MemoryView":238 - * - * def __setitem__(self, item, value): - * self.memview[item] = value # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 238, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (unlikely((PyObject_SetItem(__pyx_t_1, __pyx_v_item, __pyx_v_value) < 0))) __PYX_ERR(1, 238, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "View.MemoryView":237 - * return self.memview[item] - * - * def __setitem__(self, item, value): # <<<<<<<<<<<<<< - * self.memview[item] = value - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.array.__setitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_array_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_array_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.array.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_array___reduce_cython__(((struct __pyx_array_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_array___reduce_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.array.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_array_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_array_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.array.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_array_2__setstate_cython__(((struct __pyx_array_obj *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.array.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":248 - * - * @cname("__pyx_array_allocate_buffer") - * cdef int _allocate_buffer(array self) except -1: # <<<<<<<<<<<<<< - * - * - */ - -static int __pyx_array_allocate_buffer(struct __pyx_array_obj *__pyx_v_self) { - Py_ssize_t __pyx_v_i; - PyObject **__pyx_v_p; - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - Py_ssize_t __pyx_t_2; - Py_ssize_t __pyx_t_3; - Py_ssize_t __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("_allocate_buffer", 0); - - /* "View.MemoryView":254 - * cdef PyObject **p - * - * self.free_data = True # <<<<<<<<<<<<<< - * self.data = malloc(self.len) - * if not self.data: - */ - __pyx_v_self->free_data = 1; - - /* "View.MemoryView":255 - * - * self.free_data = True - * self.data = malloc(self.len) # <<<<<<<<<<<<<< - * if not self.data: - * raise MemoryError, "unable to allocate array data." - */ - __pyx_v_self->data = ((char *)malloc(__pyx_v_self->len)); - - /* "View.MemoryView":256 - * self.free_data = True - * self.data = malloc(self.len) - * if not self.data: # <<<<<<<<<<<<<< - * raise MemoryError, "unable to allocate array data." - * - */ - __pyx_t_1 = (!(__pyx_v_self->data != 0)); - if (unlikely(__pyx_t_1)) { - - /* "View.MemoryView":257 - * self.data = malloc(self.len) - * if not self.data: - * raise MemoryError, "unable to allocate array data." # <<<<<<<<<<<<<< - * - * if self.dtype_is_object: - */ - __Pyx_Raise(__pyx_builtin_MemoryError, __pyx_kp_s_unable_to_allocate_array_data, 0, 0); - __PYX_ERR(1, 257, __pyx_L1_error) - - /* "View.MemoryView":256 - * self.free_data = True - * self.data = malloc(self.len) - * if not self.data: # <<<<<<<<<<<<<< - * raise MemoryError, "unable to allocate array data." - * - */ - } - - /* "View.MemoryView":259 - * raise MemoryError, "unable to allocate array data." - * - * if self.dtype_is_object: # <<<<<<<<<<<<<< - * p = self.data - * for i in range(self.len // self.itemsize): - */ - if (__pyx_v_self->dtype_is_object) { - - /* "View.MemoryView":260 - * - * if self.dtype_is_object: - * p = self.data # <<<<<<<<<<<<<< - * for i in range(self.len // self.itemsize): - * p[i] = Py_None - */ - __pyx_v_p = ((PyObject **)__pyx_v_self->data); - - /* "View.MemoryView":261 - * if self.dtype_is_object: - * p = self.data - * for i in range(self.len // self.itemsize): # <<<<<<<<<<<<<< - * p[i] = Py_None - * Py_INCREF(Py_None) - */ - if (unlikely(__pyx_v_self->itemsize == 0)) { - PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - __PYX_ERR(1, 261, __pyx_L1_error) - } - else if (sizeof(Py_ssize_t) == sizeof(long) && (!(((Py_ssize_t)-1) > 0)) && unlikely(__pyx_v_self->itemsize == (Py_ssize_t)-1) && unlikely(__Pyx_UNARY_NEG_WOULD_OVERFLOW(__pyx_v_self->len))) { - PyErr_SetString(PyExc_OverflowError, "value too large to perform division"); - __PYX_ERR(1, 261, __pyx_L1_error) - } - __pyx_t_2 = __Pyx_div_Py_ssize_t(__pyx_v_self->len, __pyx_v_self->itemsize); - __pyx_t_3 = __pyx_t_2; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "View.MemoryView":262 - * p = self.data - * for i in range(self.len // self.itemsize): - * p[i] = Py_None # <<<<<<<<<<<<<< - * Py_INCREF(Py_None) - * return 0 - */ - (__pyx_v_p[__pyx_v_i]) = Py_None; - - /* "View.MemoryView":263 - * for i in range(self.len // self.itemsize): - * p[i] = Py_None - * Py_INCREF(Py_None) # <<<<<<<<<<<<<< - * return 0 - * - */ - Py_INCREF(Py_None); - } - - /* "View.MemoryView":259 - * raise MemoryError, "unable to allocate array data." - * - * if self.dtype_is_object: # <<<<<<<<<<<<<< - * p = self.data - * for i in range(self.len // self.itemsize): - */ - } - - /* "View.MemoryView":264 - * p[i] = Py_None - * Py_INCREF(Py_None) - * return 0 # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":248 - * - * @cname("__pyx_array_allocate_buffer") - * cdef int _allocate_buffer(array self) except -1: # <<<<<<<<<<<<<< - * - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView._allocate_buffer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":268 - * - * @cname("__pyx_array_new") - * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, char *c_mode, char *buf): # <<<<<<<<<<<<<< - * cdef array result - * cdef str mode = "fortran" if c_mode[0] == b'f' else "c" # this often comes from a constant C string. - */ - -static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, char *__pyx_v_format, char *__pyx_v_c_mode, char *__pyx_v_buf) { - struct __pyx_array_obj *__pyx_v_result = 0; - PyObject *__pyx_v_mode = 0; - struct __pyx_array_obj *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("array_cwrapper", 0); - - /* "View.MemoryView":270 - * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, char *c_mode, char *buf): - * cdef array result - * cdef str mode = "fortran" if c_mode[0] == b'f' else "c" # this often comes from a constant C string. # <<<<<<<<<<<<<< - * - * if buf is NULL: - */ - __pyx_t_2 = ((__pyx_v_c_mode[0]) == 'f'); - if (__pyx_t_2) { - __Pyx_INCREF(__pyx_n_s_fortran); - __pyx_t_1 = __pyx_n_s_fortran; - } else { - __Pyx_INCREF(__pyx_n_s_c); - __pyx_t_1 = __pyx_n_s_c; - } - __pyx_v_mode = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "View.MemoryView":272 - * cdef str mode = "fortran" if c_mode[0] == b'f' else "c" # this often comes from a constant C string. - * - * if buf is NULL: # <<<<<<<<<<<<<< - * result = array.__new__(array, shape, itemsize, format, mode) - * else: - */ - __pyx_t_2 = (__pyx_v_buf == NULL); - if (__pyx_t_2) { - - /* "View.MemoryView":273 - * - * if buf is NULL: - * result = array.__new__(array, shape, itemsize, format, mode) # <<<<<<<<<<<<<< - * else: - * result = array.__new__(array, shape, itemsize, format, mode, allocate_buffer=False) - */ - __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 273, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 273, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 273, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_INCREF(__pyx_v_shape); - __Pyx_GIVEREF(__pyx_v_shape); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_shape)) __PYX_ERR(1, 273, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_1)) __PYX_ERR(1, 273, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_3)) __PYX_ERR(1, 273, __pyx_L1_error); - __Pyx_INCREF(__pyx_v_mode); - __Pyx_GIVEREF(__pyx_v_mode); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 3, __pyx_v_mode)) __PYX_ERR(1, 273, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_3 = 0; - __pyx_t_3 = ((PyObject *)__pyx_tp_new_array(((PyTypeObject *)__pyx_array_type), __pyx_t_4, NULL)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 273, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_v_result = ((struct __pyx_array_obj *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "View.MemoryView":272 - * cdef str mode = "fortran" if c_mode[0] == b'f' else "c" # this often comes from a constant C string. - * - * if buf is NULL: # <<<<<<<<<<<<<< - * result = array.__new__(array, shape, itemsize, format, mode) - * else: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":275 - * result = array.__new__(array, shape, itemsize, format, mode) - * else: - * result = array.__new__(array, shape, itemsize, format, mode, allocate_buffer=False) # <<<<<<<<<<<<<< - * result.data = buf - * - */ - /*else*/ { - __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 275, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 275, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyTuple_New(4); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 275, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_shape); - __Pyx_GIVEREF(__pyx_v_shape); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_shape)) __PYX_ERR(1, 275, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3)) __PYX_ERR(1, 275, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_t_4)) __PYX_ERR(1, 275, __pyx_L1_error); - __Pyx_INCREF(__pyx_v_mode); - __Pyx_GIVEREF(__pyx_v_mode); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_v_mode)) __PYX_ERR(1, 275, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 275, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_allocate_buffer, Py_False) < 0) __PYX_ERR(1, 275, __pyx_L1_error) - __pyx_t_3 = ((PyObject *)__pyx_tp_new_array(((PyTypeObject *)__pyx_array_type), __pyx_t_1, __pyx_t_4)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 275, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_v_result = ((struct __pyx_array_obj *)__pyx_t_3); - __pyx_t_3 = 0; - - /* "View.MemoryView":276 - * else: - * result = array.__new__(array, shape, itemsize, format, mode, allocate_buffer=False) - * result.data = buf # <<<<<<<<<<<<<< - * - * return result - */ - __pyx_v_result->data = __pyx_v_buf; - } - __pyx_L3:; - - /* "View.MemoryView":278 - * result.data = buf - * - * return result # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_result); - __pyx_r = __pyx_v_result; - goto __pyx_L0; - - /* "View.MemoryView":268 - * - * @cname("__pyx_array_new") - * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, char *c_mode, char *buf): # <<<<<<<<<<<<<< - * cdef array result - * cdef str mode = "fortran" if c_mode[0] == b'f' else "c" # this often comes from a constant C string. - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("View.MemoryView.array_cwrapper", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_result); - __Pyx_XDECREF(__pyx_v_mode); - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":304 - * cdef class Enum(object): - * cdef object name - * def __init__(self, name): # <<<<<<<<<<<<<< - * self.name = name - * def __repr__(self): - */ - -/* Python wrapper */ -static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_name = 0; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__init__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 304, __pyx_L3_error) - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_name,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_name)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 304, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__init__") < 0)) __PYX_ERR(1, 304, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - } - __pyx_v_name = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 304, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.Enum.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self), __pyx_v_name); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v_name) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__init__", 0); - - /* "View.MemoryView":305 - * cdef object name - * def __init__(self, name): - * self.name = name # <<<<<<<<<<<<<< - * def __repr__(self): - * return self.name - */ - __Pyx_INCREF(__pyx_v_name); - __Pyx_GIVEREF(__pyx_v_name); - __Pyx_GOTREF(__pyx_v_self->name); - __Pyx_DECREF(__pyx_v_self->name); - __pyx_v_self->name = __pyx_v_name; - - /* "View.MemoryView":304 - * cdef class Enum(object): - * cdef object name - * def __init__(self, name): # <<<<<<<<<<<<<< - * self.name = name - * def __repr__(self): - */ - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":306 - * def __init__(self, name): - * self.name = name - * def __repr__(self): # <<<<<<<<<<<<<< - * return self.name - * - */ - -/* Python wrapper */ -static PyObject *__pyx_MemviewEnum___repr__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_MemviewEnum___repr__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(struct __pyx_MemviewEnum_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__", 0); - - /* "View.MemoryView":307 - * self.name = name - * def __repr__(self): - * return self.name # <<<<<<<<<<<<<< - * - * cdef generic = Enum("") - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->name); - __pyx_r = __pyx_v_self->name; - goto __pyx_L0; - - /* "View.MemoryView":306 - * def __init__(self, name): - * self.name = name - * def __repr__(self): # <<<<<<<<<<<<<< - * return self.name - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * cdef tuple state - * cdef object _dict - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_MemviewEnum_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_MemviewEnum_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.Enum.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_MemviewEnum___reduce_cython__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_MemviewEnum___reduce_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self) { - PyObject *__pyx_v_state = 0; - PyObject *__pyx_v__dict = 0; - int __pyx_v_use_setstate; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":5 - * cdef object _dict - * cdef bint use_setstate - * state = (self.name,) # <<<<<<<<<<<<<< - * _dict = getattr(self, '__dict__', None) - * if _dict is not None: - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 5, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v_self->name); - __Pyx_GIVEREF(__pyx_v_self->name); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_self->name)) __PYX_ERR(1, 5, __pyx_L1_error); - __pyx_v_state = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "(tree fragment)":6 - * cdef bint use_setstate - * state = (self.name,) - * _dict = getattr(self, '__dict__', None) # <<<<<<<<<<<<<< - * if _dict is not None: - * state += (_dict,) - */ - __pyx_t_1 = __Pyx_GetAttr3(((PyObject *)__pyx_v_self), __pyx_n_s_dict, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 6, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v__dict = __pyx_t_1; - __pyx_t_1 = 0; - - /* "(tree fragment)":7 - * state = (self.name,) - * _dict = getattr(self, '__dict__', None) - * if _dict is not None: # <<<<<<<<<<<<<< - * state += (_dict,) - * use_setstate = True - */ - __pyx_t_2 = (__pyx_v__dict != Py_None); - if (__pyx_t_2) { - - /* "(tree fragment)":8 - * _dict = getattr(self, '__dict__', None) - * if _dict is not None: - * state += (_dict,) # <<<<<<<<<<<<<< - * use_setstate = True - * else: - */ - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 8, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_v__dict); - __Pyx_GIVEREF(__pyx_v__dict); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v__dict)) __PYX_ERR(1, 8, __pyx_L1_error); - __pyx_t_3 = PyNumber_InPlaceAdd(__pyx_v_state, __pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 8, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_state, ((PyObject*)__pyx_t_3)); - __pyx_t_3 = 0; - - /* "(tree fragment)":9 - * if _dict is not None: - * state += (_dict,) - * use_setstate = True # <<<<<<<<<<<<<< - * else: - * use_setstate = self.name is not None - */ - __pyx_v_use_setstate = 1; - - /* "(tree fragment)":7 - * state = (self.name,) - * _dict = getattr(self, '__dict__', None) - * if _dict is not None: # <<<<<<<<<<<<<< - * state += (_dict,) - * use_setstate = True - */ - goto __pyx_L3; - } - - /* "(tree fragment)":11 - * use_setstate = True - * else: - * use_setstate = self.name is not None # <<<<<<<<<<<<<< - * if use_setstate: - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, None), state - */ - /*else*/ { - __pyx_t_2 = (__pyx_v_self->name != Py_None); - __pyx_v_use_setstate = __pyx_t_2; - } - __pyx_L3:; - - /* "(tree fragment)":12 - * else: - * use_setstate = self.name is not None - * if use_setstate: # <<<<<<<<<<<<<< - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, None), state - * else: - */ - if (__pyx_v_use_setstate) { - - /* "(tree fragment)":13 - * use_setstate = self.name is not None - * if use_setstate: - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, None), state # <<<<<<<<<<<<<< - * else: - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, state) - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_pyx_unpickle_Enum); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 13, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 13, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self)))); - __Pyx_GIVEREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self)))); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))))) __PYX_ERR(1, 13, __pyx_L1_error); - __Pyx_INCREF(__pyx_int_136983863); - __Pyx_GIVEREF(__pyx_int_136983863); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_136983863)) __PYX_ERR(1, 13, __pyx_L1_error); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 2, Py_None)) __PYX_ERR(1, 13, __pyx_L1_error); - __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 13, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3)) __PYX_ERR(1, 13, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_1)) __PYX_ERR(1, 13, __pyx_L1_error); - __Pyx_INCREF(__pyx_v_state); - __Pyx_GIVEREF(__pyx_v_state); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_v_state)) __PYX_ERR(1, 13, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_t_1 = 0; - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "(tree fragment)":12 - * else: - * use_setstate = self.name is not None - * if use_setstate: # <<<<<<<<<<<<<< - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, None), state - * else: - */ - } - - /* "(tree fragment)":15 - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, None), state - * else: - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, state) # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * __pyx_unpickle_Enum__set_state(self, __pyx_state) - */ - /*else*/ { - __Pyx_XDECREF(__pyx_r); - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_pyx_unpickle_Enum); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 15, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 15, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self)))); - __Pyx_GIVEREF(((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self)))); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))))) __PYX_ERR(1, 15, __pyx_L1_error); - __Pyx_INCREF(__pyx_int_136983863); - __Pyx_GIVEREF(__pyx_int_136983863); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_int_136983863)) __PYX_ERR(1, 15, __pyx_L1_error); - __Pyx_INCREF(__pyx_v_state); - __Pyx_GIVEREF(__pyx_v_state); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_v_state)) __PYX_ERR(1, 15, __pyx_L1_error); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 15, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4)) __PYX_ERR(1, 15, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1)) __PYX_ERR(1, 15, __pyx_L1_error); - __pyx_t_4 = 0; - __pyx_t_1 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - } - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * cdef tuple state - * cdef object _dict - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("View.MemoryView.Enum.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_state); - __Pyx_XDECREF(__pyx_v__dict); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":16 - * else: - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, state) - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * __pyx_unpickle_Enum__set_state(self, __pyx_state) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_MemviewEnum_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_MemviewEnum_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 16, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 16, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 16, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 16, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.Enum.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_MemviewEnum_2__setstate_cython__(((struct __pyx_MemviewEnum_obj *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":17 - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, state) - * def __setstate_cython__(self, __pyx_state): - * __pyx_unpickle_Enum__set_state(self, __pyx_state) # <<<<<<<<<<<<<< - */ - if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None) || __Pyx_RaiseUnexpectedTypeError("tuple", __pyx_v___pyx_state))) __PYX_ERR(1, 17, __pyx_L1_error) - __pyx_t_1 = __pyx_unpickle_Enum__set_state(__pyx_v_self, ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 17, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "(tree fragment)":16 - * else: - * return __pyx_unpickle_Enum, (type(self), 0x82a3537, state) - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * __pyx_unpickle_Enum__set_state(self, __pyx_state) - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.Enum.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":349 - * cdef __Pyx_TypeInfo *typeinfo - * - * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): # <<<<<<<<<<<<<< - * self.obj = obj - * self.flags = flags - */ - -/* Python wrapper */ -static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - PyObject *__pyx_v_obj = 0; - int __pyx_v_flags; - int __pyx_v_dtype_is_object; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[3] = {0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 349, __pyx_L3_error) - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_obj,&__pyx_n_s_flags,&__pyx_n_s_dtype_is_object,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_VARARGS(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_VARARGS(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_obj)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 349, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (likely((values[1] = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_flags)) != 0)) { - (void)__Pyx_Arg_NewRef_VARARGS(values[1]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 349, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, 1); __PYX_ERR(1, 349, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_dtype_is_object); - if (value) { values[2] = __Pyx_Arg_NewRef_VARARGS(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 349, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(1, 349, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_VARARGS(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_VARARGS(__pyx_args, 1); - values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_obj = values[0]; - __pyx_v_flags = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 349, __pyx_L3_error) - if (values[2]) { - __pyx_v_dtype_is_object = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_dtype_is_object == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 349, __pyx_L3_error) - } else { - __pyx_v_dtype_is_object = ((int)0); - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, __pyx_nargs); __PYX_ERR(1, 349, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.memoryview.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_obj, __pyx_v_flags, __pyx_v_dtype_is_object); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj, int __pyx_v_flags, int __pyx_v_dtype_is_object) { - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - Py_intptr_t __pyx_t_4; - size_t __pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__cinit__", 0); - - /* "View.MemoryView":350 - * - * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): - * self.obj = obj # <<<<<<<<<<<<<< - * self.flags = flags - * if type(self) is memoryview or obj is not None: - */ - __Pyx_INCREF(__pyx_v_obj); - __Pyx_GIVEREF(__pyx_v_obj); - __Pyx_GOTREF(__pyx_v_self->obj); - __Pyx_DECREF(__pyx_v_self->obj); - __pyx_v_self->obj = __pyx_v_obj; - - /* "View.MemoryView":351 - * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): - * self.obj = obj - * self.flags = flags # <<<<<<<<<<<<<< - * if type(self) is memoryview or obj is not None: - * __Pyx_GetBuffer(obj, &self.view, flags) - */ - __pyx_v_self->flags = __pyx_v_flags; - - /* "View.MemoryView":352 - * self.obj = obj - * self.flags = flags - * if type(self) is memoryview or obj is not None: # <<<<<<<<<<<<<< - * __Pyx_GetBuffer(obj, &self.view, flags) - * if self.view.obj == NULL: - */ - __pyx_t_2 = (((PyObject *)Py_TYPE(((PyObject *)__pyx_v_self))) == ((PyObject *)__pyx_memoryview_type)); - if (!__pyx_t_2) { - } else { - __pyx_t_1 = __pyx_t_2; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_2 = (__pyx_v_obj != Py_None); - __pyx_t_1 = __pyx_t_2; - __pyx_L4_bool_binop_done:; - if (__pyx_t_1) { - - /* "View.MemoryView":353 - * self.flags = flags - * if type(self) is memoryview or obj is not None: - * __Pyx_GetBuffer(obj, &self.view, flags) # <<<<<<<<<<<<<< - * if self.view.obj == NULL: - * (<__pyx_buffer *> &self.view).obj = Py_None - */ - __pyx_t_3 = __Pyx_GetBuffer(__pyx_v_obj, (&__pyx_v_self->view), __pyx_v_flags); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 353, __pyx_L1_error) - - /* "View.MemoryView":354 - * if type(self) is memoryview or obj is not None: - * __Pyx_GetBuffer(obj, &self.view, flags) - * if self.view.obj == NULL: # <<<<<<<<<<<<<< - * (<__pyx_buffer *> &self.view).obj = Py_None - * Py_INCREF(Py_None) - */ - __pyx_t_1 = (((PyObject *)__pyx_v_self->view.obj) == NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":355 - * __Pyx_GetBuffer(obj, &self.view, flags) - * if self.view.obj == NULL: - * (<__pyx_buffer *> &self.view).obj = Py_None # <<<<<<<<<<<<<< - * Py_INCREF(Py_None) - * - */ - ((Py_buffer *)(&__pyx_v_self->view))->obj = Py_None; - - /* "View.MemoryView":356 - * if self.view.obj == NULL: - * (<__pyx_buffer *> &self.view).obj = Py_None - * Py_INCREF(Py_None) # <<<<<<<<<<<<<< - * - * if not __PYX_CYTHON_ATOMICS_ENABLED(): - */ - Py_INCREF(Py_None); - - /* "View.MemoryView":354 - * if type(self) is memoryview or obj is not None: - * __Pyx_GetBuffer(obj, &self.view, flags) - * if self.view.obj == NULL: # <<<<<<<<<<<<<< - * (<__pyx_buffer *> &self.view).obj = Py_None - * Py_INCREF(Py_None) - */ - } - - /* "View.MemoryView":352 - * self.obj = obj - * self.flags = flags - * if type(self) is memoryview or obj is not None: # <<<<<<<<<<<<<< - * __Pyx_GetBuffer(obj, &self.view, flags) - * if self.view.obj == NULL: - */ - } - - /* "View.MemoryView":358 - * Py_INCREF(Py_None) - * - * if not __PYX_CYTHON_ATOMICS_ENABLED(): # <<<<<<<<<<<<<< - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < 8: - */ - __pyx_t_1 = (!__PYX_CYTHON_ATOMICS_ENABLED()); - if (__pyx_t_1) { - - /* "View.MemoryView":360 - * if not __PYX_CYTHON_ATOMICS_ENABLED(): - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < 8: # <<<<<<<<<<<<<< - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 - */ - __pyx_t_1 = (__pyx_memoryview_thread_locks_used < 8); - if (__pyx_t_1) { - - /* "View.MemoryView":361 - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < 8: - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: - */ - __pyx_v_self->lock = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]); - - /* "View.MemoryView":362 - * if __pyx_memoryview_thread_locks_used < 8: - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 # <<<<<<<<<<<<<< - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() - */ - __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used + 1); - - /* "View.MemoryView":360 - * if not __PYX_CYTHON_ATOMICS_ENABLED(): - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < 8: # <<<<<<<<<<<<<< - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 - */ - } - - /* "View.MemoryView":363 - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: # <<<<<<<<<<<<<< - * self.lock = PyThread_allocate_lock() - * if self.lock is NULL: - */ - __pyx_t_1 = (__pyx_v_self->lock == NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":364 - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() # <<<<<<<<<<<<<< - * if self.lock is NULL: - * raise MemoryError - */ - __pyx_v_self->lock = PyThread_allocate_lock(); - - /* "View.MemoryView":365 - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() - * if self.lock is NULL: # <<<<<<<<<<<<<< - * raise MemoryError - * - */ - __pyx_t_1 = (__pyx_v_self->lock == NULL); - if (unlikely(__pyx_t_1)) { - - /* "View.MemoryView":366 - * self.lock = PyThread_allocate_lock() - * if self.lock is NULL: - * raise MemoryError # <<<<<<<<<<<<<< - * - * if flags & PyBUF_FORMAT: - */ - PyErr_NoMemory(); __PYX_ERR(1, 366, __pyx_L1_error) - - /* "View.MemoryView":365 - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() - * if self.lock is NULL: # <<<<<<<<<<<<<< - * raise MemoryError - * - */ - } - - /* "View.MemoryView":363 - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: # <<<<<<<<<<<<<< - * self.lock = PyThread_allocate_lock() - * if self.lock is NULL: - */ - } - - /* "View.MemoryView":358 - * Py_INCREF(Py_None) - * - * if not __PYX_CYTHON_ATOMICS_ENABLED(): # <<<<<<<<<<<<<< - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < 8: - */ - } - - /* "View.MemoryView":368 - * raise MemoryError - * - * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< - * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') - * else: - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":369 - * - * if flags & PyBUF_FORMAT: - * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') # <<<<<<<<<<<<<< - * else: - * self.dtype_is_object = dtype_is_object - */ - __pyx_t_2 = ((__pyx_v_self->view.format[0]) == 'O'); - if (__pyx_t_2) { - } else { - __pyx_t_1 = __pyx_t_2; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_2 = ((__pyx_v_self->view.format[1]) == '\x00'); - __pyx_t_1 = __pyx_t_2; - __pyx_L12_bool_binop_done:; - __pyx_v_self->dtype_is_object = __pyx_t_1; - - /* "View.MemoryView":368 - * raise MemoryError - * - * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< - * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') - * else: - */ - goto __pyx_L11; - } - - /* "View.MemoryView":371 - * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') - * else: - * self.dtype_is_object = dtype_is_object # <<<<<<<<<<<<<< - * - * assert (&self.acquisition_count) % sizeof(__pyx_atomic_int_type) == 0 - */ - /*else*/ { - __pyx_v_self->dtype_is_object = __pyx_v_dtype_is_object; - } - __pyx_L11:; - - /* "View.MemoryView":373 - * self.dtype_is_object = dtype_is_object - * - * assert (&self.acquisition_count) % sizeof(__pyx_atomic_int_type) == 0 # <<<<<<<<<<<<<< - * self.typeinfo = NULL - * - */ - #ifndef CYTHON_WITHOUT_ASSERTIONS - if (unlikely(__pyx_assertions_enabled())) { - __pyx_t_4 = ((Py_intptr_t)((void *)(&__pyx_v_self->acquisition_count))); - __pyx_t_5 = (sizeof(__pyx_atomic_int_type)); - if (unlikely(__pyx_t_5 == 0)) { - PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - __PYX_ERR(1, 373, __pyx_L1_error) - } - __pyx_t_1 = ((__pyx_t_4 % __pyx_t_5) == 0); - if (unlikely(!__pyx_t_1)) { - __Pyx_Raise(__pyx_builtin_AssertionError, 0, 0, 0); - __PYX_ERR(1, 373, __pyx_L1_error) - } - } - #else - if ((1)); else __PYX_ERR(1, 373, __pyx_L1_error) - #endif - - /* "View.MemoryView":374 - * - * assert (&self.acquisition_count) % sizeof(__pyx_atomic_int_type) == 0 - * self.typeinfo = NULL # <<<<<<<<<<<<<< - * - * def __dealloc__(memoryview self): - */ - __pyx_v_self->typeinfo = NULL; - - /* "View.MemoryView":349 - * cdef __Pyx_TypeInfo *typeinfo - * - * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): # <<<<<<<<<<<<<< - * self.obj = obj - * self.flags = flags - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":376 - * self.typeinfo = NULL - * - * def __dealloc__(memoryview self): # <<<<<<<<<<<<<< - * if self.obj is not None: - * __Pyx_ReleaseBuffer(&self.view) - */ - -/* Python wrapper */ -static void __pyx_memoryview___dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_memoryview___dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(struct __pyx_memoryview_obj *__pyx_v_self) { - int __pyx_v_i; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - PyThread_type_lock __pyx_t_5; - PyThread_type_lock __pyx_t_6; - __Pyx_RefNannySetupContext("__dealloc__", 0); - - /* "View.MemoryView":377 - * - * def __dealloc__(memoryview self): - * if self.obj is not None: # <<<<<<<<<<<<<< - * __Pyx_ReleaseBuffer(&self.view) - * elif (<__pyx_buffer *> &self.view).obj == Py_None: - */ - __pyx_t_1 = (__pyx_v_self->obj != Py_None); - if (__pyx_t_1) { - - /* "View.MemoryView":378 - * def __dealloc__(memoryview self): - * if self.obj is not None: - * __Pyx_ReleaseBuffer(&self.view) # <<<<<<<<<<<<<< - * elif (<__pyx_buffer *> &self.view).obj == Py_None: - * - */ - __Pyx_ReleaseBuffer((&__pyx_v_self->view)); - - /* "View.MemoryView":377 - * - * def __dealloc__(memoryview self): - * if self.obj is not None: # <<<<<<<<<<<<<< - * __Pyx_ReleaseBuffer(&self.view) - * elif (<__pyx_buffer *> &self.view).obj == Py_None: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":379 - * if self.obj is not None: - * __Pyx_ReleaseBuffer(&self.view) - * elif (<__pyx_buffer *> &self.view).obj == Py_None: # <<<<<<<<<<<<<< - * - * (<__pyx_buffer *> &self.view).obj = NULL - */ - __pyx_t_1 = (((Py_buffer *)(&__pyx_v_self->view))->obj == Py_None); - if (__pyx_t_1) { - - /* "View.MemoryView":381 - * elif (<__pyx_buffer *> &self.view).obj == Py_None: - * - * (<__pyx_buffer *> &self.view).obj = NULL # <<<<<<<<<<<<<< - * Py_DECREF(Py_None) - * - */ - ((Py_buffer *)(&__pyx_v_self->view))->obj = NULL; - - /* "View.MemoryView":382 - * - * (<__pyx_buffer *> &self.view).obj = NULL - * Py_DECREF(Py_None) # <<<<<<<<<<<<<< - * - * cdef int i - */ - Py_DECREF(Py_None); - - /* "View.MemoryView":379 - * if self.obj is not None: - * __Pyx_ReleaseBuffer(&self.view) - * elif (<__pyx_buffer *> &self.view).obj == Py_None: # <<<<<<<<<<<<<< - * - * (<__pyx_buffer *> &self.view).obj = NULL - */ - } - __pyx_L3:; - - /* "View.MemoryView":386 - * cdef int i - * global __pyx_memoryview_thread_locks_used - * if self.lock != NULL: # <<<<<<<<<<<<<< - * for i in range(__pyx_memoryview_thread_locks_used): - * if __pyx_memoryview_thread_locks[i] is self.lock: - */ - __pyx_t_1 = (__pyx_v_self->lock != NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":387 - * global __pyx_memoryview_thread_locks_used - * if self.lock != NULL: - * for i in range(__pyx_memoryview_thread_locks_used): # <<<<<<<<<<<<<< - * if __pyx_memoryview_thread_locks[i] is self.lock: - * __pyx_memoryview_thread_locks_used -= 1 - */ - __pyx_t_2 = __pyx_memoryview_thread_locks_used; - __pyx_t_3 = __pyx_t_2; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "View.MemoryView":388 - * if self.lock != NULL: - * for i in range(__pyx_memoryview_thread_locks_used): - * if __pyx_memoryview_thread_locks[i] is self.lock: # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks_used -= 1 - * if i != __pyx_memoryview_thread_locks_used: - */ - __pyx_t_1 = ((__pyx_memoryview_thread_locks[__pyx_v_i]) == __pyx_v_self->lock); - if (__pyx_t_1) { - - /* "View.MemoryView":389 - * for i in range(__pyx_memoryview_thread_locks_used): - * if __pyx_memoryview_thread_locks[i] is self.lock: - * __pyx_memoryview_thread_locks_used -= 1 # <<<<<<<<<<<<<< - * if i != __pyx_memoryview_thread_locks_used: - * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( - */ - __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used - 1); - - /* "View.MemoryView":390 - * if __pyx_memoryview_thread_locks[i] is self.lock: - * __pyx_memoryview_thread_locks_used -= 1 - * if i != __pyx_memoryview_thread_locks_used: # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( - * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) - */ - __pyx_t_1 = (__pyx_v_i != __pyx_memoryview_thread_locks_used); - if (__pyx_t_1) { - - /* "View.MemoryView":392 - * if i != __pyx_memoryview_thread_locks_used: - * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( - * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) # <<<<<<<<<<<<<< - * break - * else: - */ - __pyx_t_5 = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]); - __pyx_t_6 = (__pyx_memoryview_thread_locks[__pyx_v_i]); - - /* "View.MemoryView":391 - * __pyx_memoryview_thread_locks_used -= 1 - * if i != __pyx_memoryview_thread_locks_used: - * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) - * break - */ - (__pyx_memoryview_thread_locks[__pyx_v_i]) = __pyx_t_5; - (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]) = __pyx_t_6; - - /* "View.MemoryView":390 - * if __pyx_memoryview_thread_locks[i] is self.lock: - * __pyx_memoryview_thread_locks_used -= 1 - * if i != __pyx_memoryview_thread_locks_used: # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( - * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) - */ - } - - /* "View.MemoryView":393 - * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( - * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) - * break # <<<<<<<<<<<<<< - * else: - * PyThread_free_lock(self.lock) - */ - goto __pyx_L6_break; - - /* "View.MemoryView":388 - * if self.lock != NULL: - * for i in range(__pyx_memoryview_thread_locks_used): - * if __pyx_memoryview_thread_locks[i] is self.lock: # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks_used -= 1 - * if i != __pyx_memoryview_thread_locks_used: - */ - } - } - /*else*/ { - - /* "View.MemoryView":395 - * break - * else: - * PyThread_free_lock(self.lock) # <<<<<<<<<<<<<< - * - * cdef char *get_item_pointer(memoryview self, object index) except NULL: - */ - PyThread_free_lock(__pyx_v_self->lock); - } - __pyx_L6_break:; - - /* "View.MemoryView":386 - * cdef int i - * global __pyx_memoryview_thread_locks_used - * if self.lock != NULL: # <<<<<<<<<<<<<< - * for i in range(__pyx_memoryview_thread_locks_used): - * if __pyx_memoryview_thread_locks[i] is self.lock: - */ - } - - /* "View.MemoryView":376 - * self.typeinfo = NULL - * - * def __dealloc__(memoryview self): # <<<<<<<<<<<<<< - * if self.obj is not None: - * __Pyx_ReleaseBuffer(&self.view) - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "View.MemoryView":397 - * PyThread_free_lock(self.lock) - * - * cdef char *get_item_pointer(memoryview self, object index) except NULL: # <<<<<<<<<<<<<< - * cdef Py_ssize_t dim - * cdef char *itemp = self.view.buf - */ - -static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index) { - Py_ssize_t __pyx_v_dim; - char *__pyx_v_itemp; - PyObject *__pyx_v_idx = NULL; - char *__pyx_r; - __Pyx_RefNannyDeclarations - Py_ssize_t __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - Py_ssize_t __pyx_t_3; - PyObject *(*__pyx_t_4)(PyObject *); - PyObject *__pyx_t_5 = NULL; - Py_ssize_t __pyx_t_6; - char *__pyx_t_7; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get_item_pointer", 0); - - /* "View.MemoryView":399 - * cdef char *get_item_pointer(memoryview self, object index) except NULL: - * cdef Py_ssize_t dim - * cdef char *itemp = self.view.buf # <<<<<<<<<<<<<< - * - * for dim, idx in enumerate(index): - */ - __pyx_v_itemp = ((char *)__pyx_v_self->view.buf); - - /* "View.MemoryView":401 - * cdef char *itemp = self.view.buf - * - * for dim, idx in enumerate(index): # <<<<<<<<<<<<<< - * itemp = pybuffer_index(&self.view, itemp, idx, dim) - * - */ - __pyx_t_1 = 0; - if (likely(PyList_CheckExact(__pyx_v_index)) || PyTuple_CheckExact(__pyx_v_index)) { - __pyx_t_2 = __pyx_v_index; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; - __pyx_t_4 = NULL; - } else { - __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 401, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 401, __pyx_L1_error) - } - for (;;) { - if (likely(!__pyx_t_4)) { - if (likely(PyList_CheckExact(__pyx_t_2))) { - if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely((0 < 0))) __PYX_ERR(1, 401, __pyx_L1_error) - #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 401, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - #endif - } else { - if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely((0 < 0))) __PYX_ERR(1, 401, __pyx_L1_error) - #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 401, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - #endif - } - } else { - __pyx_t_5 = __pyx_t_4(__pyx_t_2); - if (unlikely(!__pyx_t_5)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 401, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_5); - } - __Pyx_XDECREF_SET(__pyx_v_idx, __pyx_t_5); - __pyx_t_5 = 0; - __pyx_v_dim = __pyx_t_1; - __pyx_t_1 = (__pyx_t_1 + 1); - - /* "View.MemoryView":402 - * - * for dim, idx in enumerate(index): - * itemp = pybuffer_index(&self.view, itemp, idx, dim) # <<<<<<<<<<<<<< - * - * return itemp - */ - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_idx); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 402, __pyx_L1_error) - __pyx_t_7 = __pyx_pybuffer_index((&__pyx_v_self->view), __pyx_v_itemp, __pyx_t_6, __pyx_v_dim); if (unlikely(__pyx_t_7 == ((char *)NULL))) __PYX_ERR(1, 402, __pyx_L1_error) - __pyx_v_itemp = __pyx_t_7; - - /* "View.MemoryView":401 - * cdef char *itemp = self.view.buf - * - * for dim, idx in enumerate(index): # <<<<<<<<<<<<<< - * itemp = pybuffer_index(&self.view, itemp, idx, dim) - * - */ - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "View.MemoryView":404 - * itemp = pybuffer_index(&self.view, itemp, idx, dim) - * - * return itemp # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = __pyx_v_itemp; - goto __pyx_L0; - - /* "View.MemoryView":397 - * PyThread_free_lock(self.lock) - * - * cdef char *get_item_pointer(memoryview self, object index) except NULL: # <<<<<<<<<<<<<< - * cdef Py_ssize_t dim - * cdef char *itemp = self.view.buf - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("View.MemoryView.memoryview.get_item_pointer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_idx); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":407 - * - * - * def __getitem__(memoryview self, object index): # <<<<<<<<<<<<<< - * if index is Ellipsis: - * return self - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index); /*proto*/ -static PyObject *__pyx_memoryview___getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((PyObject *)__pyx_v_index)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index) { - PyObject *__pyx_v_have_slices = NULL; - PyObject *__pyx_v_indices = NULL; - char *__pyx_v_itemp; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - char *__pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__getitem__", 0); - - /* "View.MemoryView":408 - * - * def __getitem__(memoryview self, object index): - * if index is Ellipsis: # <<<<<<<<<<<<<< - * return self - * - */ - __pyx_t_1 = (__pyx_v_index == __pyx_builtin_Ellipsis); - if (__pyx_t_1) { - - /* "View.MemoryView":409 - * def __getitem__(memoryview self, object index): - * if index is Ellipsis: - * return self # <<<<<<<<<<<<<< - * - * have_slices, indices = _unellipsify(index, self.view.ndim) - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __pyx_r = ((PyObject *)__pyx_v_self); - goto __pyx_L0; - - /* "View.MemoryView":408 - * - * def __getitem__(memoryview self, object index): - * if index is Ellipsis: # <<<<<<<<<<<<<< - * return self - * - */ - } - - /* "View.MemoryView":411 - * return self - * - * have_slices, indices = _unellipsify(index, self.view.ndim) # <<<<<<<<<<<<<< - * - * cdef char *itemp - */ - __pyx_t_2 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 411, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (likely(__pyx_t_2 != Py_None)) { - PyObject* sequence = __pyx_t_2; - Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); - if (unlikely(size != 2)) { - if (size > 2) __Pyx_RaiseTooManyValuesError(2); - else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(1, 411, __pyx_L1_error) - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); - __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); - __Pyx_INCREF(__pyx_t_3); - __Pyx_INCREF(__pyx_t_4); - #else - __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 411, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 411, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - #endif - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - } else { - __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 411, __pyx_L1_error) - } - __pyx_v_have_slices = __pyx_t_3; - __pyx_t_3 = 0; - __pyx_v_indices = __pyx_t_4; - __pyx_t_4 = 0; - - /* "View.MemoryView":414 - * - * cdef char *itemp - * if have_slices: # <<<<<<<<<<<<<< - * return memview_slice(self, indices) - * else: - */ - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(1, 414, __pyx_L1_error) - if (__pyx_t_1) { - - /* "View.MemoryView":415 - * cdef char *itemp - * if have_slices: - * return memview_slice(self, indices) # <<<<<<<<<<<<<< - * else: - * itemp = self.get_item_pointer(indices) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = ((PyObject *)__pyx_memview_slice(__pyx_v_self, __pyx_v_indices)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 415, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":414 - * - * cdef char *itemp - * if have_slices: # <<<<<<<<<<<<<< - * return memview_slice(self, indices) - * else: - */ - } - - /* "View.MemoryView":417 - * return memview_slice(self, indices) - * else: - * itemp = self.get_item_pointer(indices) # <<<<<<<<<<<<<< - * return self.convert_item_to_object(itemp) - * - */ - /*else*/ { - __pyx_t_5 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_indices); if (unlikely(__pyx_t_5 == ((char *)NULL))) __PYX_ERR(1, 417, __pyx_L1_error) - __pyx_v_itemp = __pyx_t_5; - - /* "View.MemoryView":418 - * else: - * itemp = self.get_item_pointer(indices) - * return self.convert_item_to_object(itemp) # <<<<<<<<<<<<<< - * - * def __setitem__(memoryview self, object index, object value): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->convert_item_to_object(__pyx_v_self, __pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 418, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - } - - /* "View.MemoryView":407 - * - * - * def __getitem__(memoryview self, object index): # <<<<<<<<<<<<<< - * if index is Ellipsis: - * return self - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("View.MemoryView.memoryview.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_have_slices); - __Pyx_XDECREF(__pyx_v_indices); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":420 - * return self.convert_item_to_object(itemp) - * - * def __setitem__(memoryview self, object index, object value): # <<<<<<<<<<<<<< - * if self.view.readonly: - * raise TypeError, "Cannot assign to read-only memoryview" - */ - -/* Python wrapper */ -static int __pyx_memoryview___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /*proto*/ -static int __pyx_memoryview___setitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setitem__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((PyObject *)__pyx_v_index), ((PyObject *)__pyx_v_value)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value) { - PyObject *__pyx_v_have_slices = NULL; - PyObject *__pyx_v_obj = NULL; - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setitem__", 0); - __Pyx_INCREF(__pyx_v_index); - - /* "View.MemoryView":421 - * - * def __setitem__(memoryview self, object index, object value): - * if self.view.readonly: # <<<<<<<<<<<<<< - * raise TypeError, "Cannot assign to read-only memoryview" - * - */ - if (unlikely(__pyx_v_self->view.readonly)) { - - /* "View.MemoryView":422 - * def __setitem__(memoryview self, object index, object value): - * if self.view.readonly: - * raise TypeError, "Cannot assign to read-only memoryview" # <<<<<<<<<<<<<< - * - * have_slices, index = _unellipsify(index, self.view.ndim) - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_Cannot_assign_to_read_only_memor, 0, 0); - __PYX_ERR(1, 422, __pyx_L1_error) - - /* "View.MemoryView":421 - * - * def __setitem__(memoryview self, object index, object value): - * if self.view.readonly: # <<<<<<<<<<<<<< - * raise TypeError, "Cannot assign to read-only memoryview" - * - */ - } - - /* "View.MemoryView":424 - * raise TypeError, "Cannot assign to read-only memoryview" - * - * have_slices, index = _unellipsify(index, self.view.ndim) # <<<<<<<<<<<<<< - * - * if have_slices: - */ - __pyx_t_1 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 424, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (likely(__pyx_t_1 != Py_None)) { - PyObject* sequence = __pyx_t_1; - Py_ssize_t size = __Pyx_PySequence_SIZE(sequence); - if (unlikely(size != 2)) { - if (size > 2) __Pyx_RaiseTooManyValuesError(2); - else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(1, 424, __pyx_L1_error) - } - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_2 = PyTuple_GET_ITEM(sequence, 0); - __pyx_t_3 = PyTuple_GET_ITEM(sequence, 1); - __Pyx_INCREF(__pyx_t_2); - __Pyx_INCREF(__pyx_t_3); - #else - __pyx_t_2 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 424, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 424, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - } else { - __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 424, __pyx_L1_error) - } - __pyx_v_have_slices = __pyx_t_2; - __pyx_t_2 = 0; - __Pyx_DECREF_SET(__pyx_v_index, __pyx_t_3); - __pyx_t_3 = 0; - - /* "View.MemoryView":426 - * have_slices, index = _unellipsify(index, self.view.ndim) - * - * if have_slices: # <<<<<<<<<<<<<< - * obj = self.is_slice(value) - * if obj: - */ - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely((__pyx_t_4 < 0))) __PYX_ERR(1, 426, __pyx_L1_error) - if (__pyx_t_4) { - - /* "View.MemoryView":427 - * - * if have_slices: - * obj = self.is_slice(value) # <<<<<<<<<<<<<< - * if obj: - * self.setitem_slice_assignment(self[index], obj) - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->is_slice(__pyx_v_self, __pyx_v_value); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 427, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_obj = __pyx_t_1; - __pyx_t_1 = 0; - - /* "View.MemoryView":428 - * if have_slices: - * obj = self.is_slice(value) - * if obj: # <<<<<<<<<<<<<< - * self.setitem_slice_assignment(self[index], obj) - * else: - */ - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_v_obj); if (unlikely((__pyx_t_4 < 0))) __PYX_ERR(1, 428, __pyx_L1_error) - if (__pyx_t_4) { - - /* "View.MemoryView":429 - * obj = self.is_slice(value) - * if obj: - * self.setitem_slice_assignment(self[index], obj) # <<<<<<<<<<<<<< - * else: - * self.setitem_slice_assign_scalar(self[index], value) - */ - __pyx_t_1 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 429, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assignment(__pyx_v_self, __pyx_t_1, __pyx_v_obj); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 429, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "View.MemoryView":428 - * if have_slices: - * obj = self.is_slice(value) - * if obj: # <<<<<<<<<<<<<< - * self.setitem_slice_assignment(self[index], obj) - * else: - */ - goto __pyx_L5; - } - - /* "View.MemoryView":431 - * self.setitem_slice_assignment(self[index], obj) - * else: - * self.setitem_slice_assign_scalar(self[index], value) # <<<<<<<<<<<<<< - * else: - * self.setitem_indexed(index, value) - */ - /*else*/ { - __pyx_t_3 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 431, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(1, 431, __pyx_L1_error) - __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assign_scalar(__pyx_v_self, ((struct __pyx_memoryview_obj *)__pyx_t_3), __pyx_v_value); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 431, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - } - __pyx_L5:; - - /* "View.MemoryView":426 - * have_slices, index = _unellipsify(index, self.view.ndim) - * - * if have_slices: # <<<<<<<<<<<<<< - * obj = self.is_slice(value) - * if obj: - */ - goto __pyx_L4; - } - - /* "View.MemoryView":433 - * self.setitem_slice_assign_scalar(self[index], value) - * else: - * self.setitem_indexed(index, value) # <<<<<<<<<<<<<< - * - * cdef is_slice(self, obj): - */ - /*else*/ { - __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_indexed(__pyx_v_self, __pyx_v_index, __pyx_v_value); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 433, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - } - __pyx_L4:; - - /* "View.MemoryView":420 - * return self.convert_item_to_object(itemp) - * - * def __setitem__(memoryview self, object index, object value): # <<<<<<<<<<<<<< - * if self.view.readonly: - * raise TypeError, "Cannot assign to read-only memoryview" - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.memoryview.__setitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_have_slices); - __Pyx_XDECREF(__pyx_v_obj); - __Pyx_XDECREF(__pyx_v_index); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":435 - * self.setitem_indexed(index, value) - * - * cdef is_slice(self, obj): # <<<<<<<<<<<<<< - * if not isinstance(obj, memoryview): - * try: - */ - -static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_t_9; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("is_slice", 0); - __Pyx_INCREF(__pyx_v_obj); - - /* "View.MemoryView":436 - * - * cdef is_slice(self, obj): - * if not isinstance(obj, memoryview): # <<<<<<<<<<<<<< - * try: - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - */ - __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_obj, __pyx_memoryview_type); - __pyx_t_2 = (!__pyx_t_1); - if (__pyx_t_2) { - - /* "View.MemoryView":437 - * cdef is_slice(self, obj): - * if not isinstance(obj, memoryview): - * try: # <<<<<<<<<<<<<< - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - * self.dtype_is_object) - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_3, &__pyx_t_4, &__pyx_t_5); - __Pyx_XGOTREF(__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_4); - __Pyx_XGOTREF(__pyx_t_5); - /*try:*/ { - - /* "View.MemoryView":438 - * if not isinstance(obj, memoryview): - * try: - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, # <<<<<<<<<<<<<< - * self.dtype_is_object) - * except TypeError: - */ - __pyx_t_6 = __Pyx_PyInt_From_int(((__pyx_v_self->flags & (~PyBUF_WRITABLE)) | PyBUF_ANY_CONTIGUOUS)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 438, __pyx_L4_error) - __Pyx_GOTREF(__pyx_t_6); - - /* "View.MemoryView":439 - * try: - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - * self.dtype_is_object) # <<<<<<<<<<<<<< - * except TypeError: - * return None - */ - __pyx_t_7 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 439, __pyx_L4_error) - __Pyx_GOTREF(__pyx_t_7); - - /* "View.MemoryView":438 - * if not isinstance(obj, memoryview): - * try: - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, # <<<<<<<<<<<<<< - * self.dtype_is_object) - * except TypeError: - */ - __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 438, __pyx_L4_error) - __Pyx_GOTREF(__pyx_t_8); - __Pyx_INCREF(__pyx_v_obj); - __Pyx_GIVEREF(__pyx_v_obj); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_v_obj)) __PYX_ERR(1, 438, __pyx_L4_error); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_6)) __PYX_ERR(1, 438, __pyx_L4_error); - __Pyx_GIVEREF(__pyx_t_7); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_7)) __PYX_ERR(1, 438, __pyx_L4_error); - __pyx_t_6 = 0; - __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_8, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 438, __pyx_L4_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF_SET(__pyx_v_obj, __pyx_t_7); - __pyx_t_7 = 0; - - /* "View.MemoryView":437 - * cdef is_slice(self, obj): - * if not isinstance(obj, memoryview): - * try: # <<<<<<<<<<<<<< - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - * self.dtype_is_object) - */ - } - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - goto __pyx_L9_try_end; - __pyx_L4_error:; - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0; - - /* "View.MemoryView":440 - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - * self.dtype_is_object) - * except TypeError: # <<<<<<<<<<<<<< - * return None - * - */ - __pyx_t_9 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_TypeError); - if (__pyx_t_9) { - __Pyx_AddTraceback("View.MemoryView.memoryview.is_slice", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_7, &__pyx_t_8, &__pyx_t_6) < 0) __PYX_ERR(1, 440, __pyx_L6_except_error) - __Pyx_XGOTREF(__pyx_t_7); - __Pyx_XGOTREF(__pyx_t_8); - __Pyx_XGOTREF(__pyx_t_6); - - /* "View.MemoryView":441 - * self.dtype_is_object) - * except TypeError: - * return None # <<<<<<<<<<<<<< - * - * return obj - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L7_except_return; - } - goto __pyx_L6_except_error; - - /* "View.MemoryView":437 - * cdef is_slice(self, obj): - * if not isinstance(obj, memoryview): - * try: # <<<<<<<<<<<<<< - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - * self.dtype_is_object) - */ - __pyx_L6_except_error:; - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_XGIVEREF(__pyx_t_4); - __Pyx_XGIVEREF(__pyx_t_5); - __Pyx_ExceptionReset(__pyx_t_3, __pyx_t_4, __pyx_t_5); - goto __pyx_L1_error; - __pyx_L7_except_return:; - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_XGIVEREF(__pyx_t_4); - __Pyx_XGIVEREF(__pyx_t_5); - __Pyx_ExceptionReset(__pyx_t_3, __pyx_t_4, __pyx_t_5); - goto __pyx_L0; - __pyx_L9_try_end:; - } - - /* "View.MemoryView":436 - * - * cdef is_slice(self, obj): - * if not isinstance(obj, memoryview): # <<<<<<<<<<<<<< - * try: - * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, - */ - } - - /* "View.MemoryView":443 - * return None - * - * return obj # <<<<<<<<<<<<<< - * - * cdef setitem_slice_assignment(self, dst, src): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_obj); - __pyx_r = __pyx_v_obj; - goto __pyx_L0; - - /* "View.MemoryView":435 - * self.setitem_indexed(index, value) - * - * cdef is_slice(self, obj): # <<<<<<<<<<<<<< - * if not isinstance(obj, memoryview): - * try: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("View.MemoryView.memoryview.is_slice", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_obj); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":445 - * return obj - * - * cdef setitem_slice_assignment(self, dst, src): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice dst_slice - * cdef __Pyx_memviewslice src_slice - */ - -static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_dst, PyObject *__pyx_v_src) { - __Pyx_memviewslice __pyx_v_dst_slice; - __Pyx_memviewslice __pyx_v_src_slice; - __Pyx_memviewslice __pyx_v_msrc; - __Pyx_memviewslice __pyx_v_mdst; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_memviewslice *__pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("setitem_slice_assignment", 0); - - /* "View.MemoryView":448 - * cdef __Pyx_memviewslice dst_slice - * cdef __Pyx_memviewslice src_slice - * cdef __Pyx_memviewslice msrc = get_slice_from_memview(src, &src_slice)[0] # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice mdst = get_slice_from_memview(dst, &dst_slice)[0] - * - */ - if (!(likely(((__pyx_v_src) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_src, __pyx_memoryview_type))))) __PYX_ERR(1, 448, __pyx_L1_error) - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_src), (&__pyx_v_src_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 448, __pyx_L1_error) - __pyx_v_msrc = (__pyx_t_1[0]); - - /* "View.MemoryView":449 - * cdef __Pyx_memviewslice src_slice - * cdef __Pyx_memviewslice msrc = get_slice_from_memview(src, &src_slice)[0] - * cdef __Pyx_memviewslice mdst = get_slice_from_memview(dst, &dst_slice)[0] # <<<<<<<<<<<<<< - * - * memoryview_copy_contents(msrc, mdst, src.ndim, dst.ndim, self.dtype_is_object) - */ - if (!(likely(((__pyx_v_dst) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_dst, __pyx_memoryview_type))))) __PYX_ERR(1, 449, __pyx_L1_error) - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_dst), (&__pyx_v_dst_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 449, __pyx_L1_error) - __pyx_v_mdst = (__pyx_t_1[0]); - - /* "View.MemoryView":451 - * cdef __Pyx_memviewslice mdst = get_slice_from_memview(dst, &dst_slice)[0] - * - * memoryview_copy_contents(msrc, mdst, src.ndim, dst.ndim, self.dtype_is_object) # <<<<<<<<<<<<<< - * - * cdef setitem_slice_assign_scalar(self, memoryview dst, value): - */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_src, __pyx_n_s_ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 451, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyInt_As_int(__pyx_t_2); if (unlikely((__pyx_t_3 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 451, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_dst, __pyx_n_s_ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 451, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_t_2); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 451, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_5 = __pyx_memoryview_copy_contents(__pyx_v_msrc, __pyx_v_mdst, __pyx_t_3, __pyx_t_4, __pyx_v_self->dtype_is_object); if (unlikely(__pyx_t_5 == ((int)-1))) __PYX_ERR(1, 451, __pyx_L1_error) - - /* "View.MemoryView":445 - * return obj - * - * cdef setitem_slice_assignment(self, dst, src): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice dst_slice - * cdef __Pyx_memviewslice src_slice - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.setitem_slice_assignment", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":453 - * memoryview_copy_contents(msrc, mdst, src.ndim, dst.ndim, self.dtype_is_object) - * - * cdef setitem_slice_assign_scalar(self, memoryview dst, value): # <<<<<<<<<<<<<< - * cdef int array[128] - * cdef void *tmp = NULL - */ - -static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memoryview_obj *__pyx_v_self, struct __pyx_memoryview_obj *__pyx_v_dst, PyObject *__pyx_v_value) { - int __pyx_v_array[0x80]; - void *__pyx_v_tmp; - void *__pyx_v_item; - __Pyx_memviewslice *__pyx_v_dst_slice; - __Pyx_memviewslice __pyx_v_tmp_slice; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_memviewslice *__pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_t_4; - int __pyx_t_5; - char const *__pyx_t_6; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - PyObject *__pyx_t_9 = NULL; - PyObject *__pyx_t_10 = NULL; - PyObject *__pyx_t_11 = NULL; - PyObject *__pyx_t_12 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("setitem_slice_assign_scalar", 0); - - /* "View.MemoryView":455 - * cdef setitem_slice_assign_scalar(self, memoryview dst, value): - * cdef int array[128] - * cdef void *tmp = NULL # <<<<<<<<<<<<<< - * cdef void *item - * - */ - __pyx_v_tmp = NULL; - - /* "View.MemoryView":460 - * cdef __Pyx_memviewslice *dst_slice - * cdef __Pyx_memviewslice tmp_slice - * dst_slice = get_slice_from_memview(dst, &tmp_slice) # <<<<<<<<<<<<<< - * - * if self.view.itemsize > sizeof(array): - */ - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_dst, (&__pyx_v_tmp_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 460, __pyx_L1_error) - __pyx_v_dst_slice = __pyx_t_1; - - /* "View.MemoryView":462 - * dst_slice = get_slice_from_memview(dst, &tmp_slice) - * - * if self.view.itemsize > sizeof(array): # <<<<<<<<<<<<<< - * tmp = PyMem_Malloc(self.view.itemsize) - * if tmp == NULL: - */ - __pyx_t_2 = (((size_t)__pyx_v_self->view.itemsize) > (sizeof(__pyx_v_array))); - if (__pyx_t_2) { - - /* "View.MemoryView":463 - * - * if self.view.itemsize > sizeof(array): - * tmp = PyMem_Malloc(self.view.itemsize) # <<<<<<<<<<<<<< - * if tmp == NULL: - * raise MemoryError - */ - __pyx_v_tmp = PyMem_Malloc(__pyx_v_self->view.itemsize); - - /* "View.MemoryView":464 - * if self.view.itemsize > sizeof(array): - * tmp = PyMem_Malloc(self.view.itemsize) - * if tmp == NULL: # <<<<<<<<<<<<<< - * raise MemoryError - * item = tmp - */ - __pyx_t_2 = (__pyx_v_tmp == NULL); - if (unlikely(__pyx_t_2)) { - - /* "View.MemoryView":465 - * tmp = PyMem_Malloc(self.view.itemsize) - * if tmp == NULL: - * raise MemoryError # <<<<<<<<<<<<<< - * item = tmp - * else: - */ - PyErr_NoMemory(); __PYX_ERR(1, 465, __pyx_L1_error) - - /* "View.MemoryView":464 - * if self.view.itemsize > sizeof(array): - * tmp = PyMem_Malloc(self.view.itemsize) - * if tmp == NULL: # <<<<<<<<<<<<<< - * raise MemoryError - * item = tmp - */ - } - - /* "View.MemoryView":466 - * if tmp == NULL: - * raise MemoryError - * item = tmp # <<<<<<<<<<<<<< - * else: - * item = array - */ - __pyx_v_item = __pyx_v_tmp; - - /* "View.MemoryView":462 - * dst_slice = get_slice_from_memview(dst, &tmp_slice) - * - * if self.view.itemsize > sizeof(array): # <<<<<<<<<<<<<< - * tmp = PyMem_Malloc(self.view.itemsize) - * if tmp == NULL: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":468 - * item = tmp - * else: - * item = array # <<<<<<<<<<<<<< - * - * try: - */ - /*else*/ { - __pyx_v_item = ((void *)__pyx_v_array); - } - __pyx_L3:; - - /* "View.MemoryView":470 - * item = array - * - * try: # <<<<<<<<<<<<<< - * if self.dtype_is_object: - * ( item)[0] = value - */ - /*try:*/ { - - /* "View.MemoryView":471 - * - * try: - * if self.dtype_is_object: # <<<<<<<<<<<<<< - * ( item)[0] = value - * else: - */ - if (__pyx_v_self->dtype_is_object) { - - /* "View.MemoryView":472 - * try: - * if self.dtype_is_object: - * ( item)[0] = value # <<<<<<<<<<<<<< - * else: - * self.assign_item_from_object( item, value) - */ - (((PyObject **)__pyx_v_item)[0]) = ((PyObject *)__pyx_v_value); - - /* "View.MemoryView":471 - * - * try: - * if self.dtype_is_object: # <<<<<<<<<<<<<< - * ( item)[0] = value - * else: - */ - goto __pyx_L8; - } - - /* "View.MemoryView":474 - * ( item)[0] = value - * else: - * self.assign_item_from_object( item, value) # <<<<<<<<<<<<<< - * - * - */ - /*else*/ { - __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, ((char *)__pyx_v_item), __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 474, __pyx_L6_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __pyx_L8:; - - /* "View.MemoryView":478 - * - * - * if self.view.suboffsets != NULL: # <<<<<<<<<<<<<< - * assert_direct_dimensions(self.view.suboffsets, self.view.ndim) - * slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, - */ - __pyx_t_2 = (__pyx_v_self->view.suboffsets != NULL); - if (__pyx_t_2) { - - /* "View.MemoryView":479 - * - * if self.view.suboffsets != NULL: - * assert_direct_dimensions(self.view.suboffsets, self.view.ndim) # <<<<<<<<<<<<<< - * slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, - * item, self.dtype_is_object) - */ - __pyx_t_4 = assert_direct_dimensions(__pyx_v_self->view.suboffsets, __pyx_v_self->view.ndim); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 479, __pyx_L6_error) - - /* "View.MemoryView":478 - * - * - * if self.view.suboffsets != NULL: # <<<<<<<<<<<<<< - * assert_direct_dimensions(self.view.suboffsets, self.view.ndim) - * slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, - */ - } - - /* "View.MemoryView":480 - * if self.view.suboffsets != NULL: - * assert_direct_dimensions(self.view.suboffsets, self.view.ndim) - * slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, # <<<<<<<<<<<<<< - * item, self.dtype_is_object) - * finally: - */ - __pyx_memoryview_slice_assign_scalar(__pyx_v_dst_slice, __pyx_v_dst->view.ndim, __pyx_v_self->view.itemsize, __pyx_v_item, __pyx_v_self->dtype_is_object); - } - - /* "View.MemoryView":483 - * item, self.dtype_is_object) - * finally: - * PyMem_Free(tmp) # <<<<<<<<<<<<<< - * - * cdef setitem_indexed(self, index, value): - */ - /*finally:*/ { - /*normal exit:*/{ - PyMem_Free(__pyx_v_tmp); - goto __pyx_L7; - } - __pyx_L6_error:; - /*exception exit:*/{ - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __pyx_t_7 = 0; __pyx_t_8 = 0; __pyx_t_9 = 0; __pyx_t_10 = 0; __pyx_t_11 = 0; __pyx_t_12 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - if (PY_MAJOR_VERSION >= 3) __Pyx_ExceptionSwap(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12); - if ((PY_MAJOR_VERSION < 3) || unlikely(__Pyx_GetException(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9) < 0)) __Pyx_ErrFetch(&__pyx_t_7, &__pyx_t_8, &__pyx_t_9); - __Pyx_XGOTREF(__pyx_t_7); - __Pyx_XGOTREF(__pyx_t_8); - __Pyx_XGOTREF(__pyx_t_9); - __Pyx_XGOTREF(__pyx_t_10); - __Pyx_XGOTREF(__pyx_t_11); - __Pyx_XGOTREF(__pyx_t_12); - __pyx_t_4 = __pyx_lineno; __pyx_t_5 = __pyx_clineno; __pyx_t_6 = __pyx_filename; - { - PyMem_Free(__pyx_v_tmp); - } - if (PY_MAJOR_VERSION >= 3) { - __Pyx_XGIVEREF(__pyx_t_10); - __Pyx_XGIVEREF(__pyx_t_11); - __Pyx_XGIVEREF(__pyx_t_12); - __Pyx_ExceptionReset(__pyx_t_10, __pyx_t_11, __pyx_t_12); - } - __Pyx_XGIVEREF(__pyx_t_7); - __Pyx_XGIVEREF(__pyx_t_8); - __Pyx_XGIVEREF(__pyx_t_9); - __Pyx_ErrRestore(__pyx_t_7, __pyx_t_8, __pyx_t_9); - __pyx_t_7 = 0; __pyx_t_8 = 0; __pyx_t_9 = 0; __pyx_t_10 = 0; __pyx_t_11 = 0; __pyx_t_12 = 0; - __pyx_lineno = __pyx_t_4; __pyx_clineno = __pyx_t_5; __pyx_filename = __pyx_t_6; - goto __pyx_L1_error; - } - __pyx_L7:; - } - - /* "View.MemoryView":453 - * memoryview_copy_contents(msrc, mdst, src.ndim, dst.ndim, self.dtype_is_object) - * - * cdef setitem_slice_assign_scalar(self, memoryview dst, value): # <<<<<<<<<<<<<< - * cdef int array[128] - * cdef void *tmp = NULL - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.memoryview.setitem_slice_assign_scalar", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":485 - * PyMem_Free(tmp) - * - * cdef setitem_indexed(self, index, value): # <<<<<<<<<<<<<< - * cdef char *itemp = self.get_item_pointer(index) - * self.assign_item_from_object(itemp, value) - */ - -static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value) { - char *__pyx_v_itemp; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - char *__pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("setitem_indexed", 0); - - /* "View.MemoryView":486 - * - * cdef setitem_indexed(self, index, value): - * cdef char *itemp = self.get_item_pointer(index) # <<<<<<<<<<<<<< - * self.assign_item_from_object(itemp, value) - * - */ - __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_index); if (unlikely(__pyx_t_1 == ((char *)NULL))) __PYX_ERR(1, 486, __pyx_L1_error) - __pyx_v_itemp = __pyx_t_1; - - /* "View.MemoryView":487 - * cdef setitem_indexed(self, index, value): - * cdef char *itemp = self.get_item_pointer(index) - * self.assign_item_from_object(itemp, value) # <<<<<<<<<<<<<< - * - * cdef convert_item_to_object(self, char *itemp): - */ - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 487, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "View.MemoryView":485 - * PyMem_Free(tmp) - * - * cdef setitem_indexed(self, index, value): # <<<<<<<<<<<<<< - * cdef char *itemp = self.get_item_pointer(index) - * self.assign_item_from_object(itemp, value) - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.setitem_indexed", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":489 - * self.assign_item_from_object(itemp, value) - * - * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< - * """Only used if instantiated manually by the user, or if Cython doesn't - * know how to convert the type""" - */ - -static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp) { - PyObject *__pyx_v_struct = NULL; - PyObject *__pyx_v_bytesitem = 0; - PyObject *__pyx_v_result = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - Py_ssize_t __pyx_t_9; - int __pyx_t_10; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("convert_item_to_object", 0); - - /* "View.MemoryView":492 - * """Only used if instantiated manually by the user, or if Cython doesn't - * know how to convert the type""" - * import struct # <<<<<<<<<<<<<< - * cdef bytes bytesitem - * - */ - __pyx_t_1 = __Pyx_ImportDottedModule(__pyx_n_s_struct, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 492, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_struct = __pyx_t_1; - __pyx_t_1 = 0; - - /* "View.MemoryView":495 - * cdef bytes bytesitem - * - * bytesitem = itemp[:self.view.itemsize] # <<<<<<<<<<<<<< - * try: - * result = struct.unpack(self.view.format, bytesitem) - */ - __pyx_t_1 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_itemp + 0, __pyx_v_self->view.itemsize - 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 495, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_bytesitem = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "View.MemoryView":496 - * - * bytesitem = itemp[:self.view.itemsize] - * try: # <<<<<<<<<<<<<< - * result = struct.unpack(self.view.format, bytesitem) - * except struct.error: - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_2, &__pyx_t_3, &__pyx_t_4); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_4); - /*try:*/ { - - /* "View.MemoryView":497 - * bytesitem = itemp[:self.view.itemsize] - * try: - * result = struct.unpack(self.view.format, bytesitem) # <<<<<<<<<<<<<< - * except struct.error: - * raise ValueError, "Unable to convert item to object" - */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_unpack); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 497, __pyx_L3_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 497, __pyx_L3_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = NULL; - __pyx_t_8 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_5))) { - __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_5); - if (likely(__pyx_t_7)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); - __Pyx_INCREF(__pyx_t_7); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_5, function); - __pyx_t_8 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[3] = {__pyx_t_7, __pyx_t_6, __pyx_v_bytesitem}; - __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_8, 2+__pyx_t_8); - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 497, __pyx_L3_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - } - __pyx_v_result = __pyx_t_1; - __pyx_t_1 = 0; - - /* "View.MemoryView":496 - * - * bytesitem = itemp[:self.view.itemsize] - * try: # <<<<<<<<<<<<<< - * result = struct.unpack(self.view.format, bytesitem) - * except struct.error: - */ - } - - /* "View.MemoryView":501 - * raise ValueError, "Unable to convert item to object" - * else: - * if len(self.view.format) == 1: # <<<<<<<<<<<<<< - * return result[0] - * return result - */ - /*else:*/ { - __pyx_t_9 = __Pyx_ssize_strlen(__pyx_v_self->view.format); if (unlikely(__pyx_t_9 == ((Py_ssize_t)-1))) __PYX_ERR(1, 501, __pyx_L5_except_error) - __pyx_t_10 = (__pyx_t_9 == 1); - if (__pyx_t_10) { - - /* "View.MemoryView":502 - * else: - * if len(self.view.format) == 1: - * return result[0] # <<<<<<<<<<<<<< - * return result - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_result, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 502, __pyx_L5_except_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L6_except_return; - - /* "View.MemoryView":501 - * raise ValueError, "Unable to convert item to object" - * else: - * if len(self.view.format) == 1: # <<<<<<<<<<<<<< - * return result[0] - * return result - */ - } - - /* "View.MemoryView":503 - * if len(self.view.format) == 1: - * return result[0] - * return result # <<<<<<<<<<<<<< - * - * cdef assign_item_from_object(self, char *itemp, object value): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_result); - __pyx_r = __pyx_v_result; - goto __pyx_L6_except_return; - } - __pyx_L3_error:; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "View.MemoryView":498 - * try: - * result = struct.unpack(self.view.format, bytesitem) - * except struct.error: # <<<<<<<<<<<<<< - * raise ValueError, "Unable to convert item to object" - * else: - */ - __Pyx_ErrFetch(&__pyx_t_1, &__pyx_t_5, &__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_error); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 498, __pyx_L5_except_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyErr_GivenExceptionMatches(__pyx_t_1, __pyx_t_7); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_ErrRestore(__pyx_t_1, __pyx_t_5, __pyx_t_6); - __pyx_t_1 = 0; __pyx_t_5 = 0; __pyx_t_6 = 0; - if (__pyx_t_8) { - __Pyx_AddTraceback("View.MemoryView.memoryview.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_6, &__pyx_t_5, &__pyx_t_1) < 0) __PYX_ERR(1, 498, __pyx_L5_except_error) - __Pyx_XGOTREF(__pyx_t_6); - __Pyx_XGOTREF(__pyx_t_5); - __Pyx_XGOTREF(__pyx_t_1); - - /* "View.MemoryView":499 - * result = struct.unpack(self.view.format, bytesitem) - * except struct.error: - * raise ValueError, "Unable to convert item to object" # <<<<<<<<<<<<<< - * else: - * if len(self.view.format) == 1: - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_Unable_to_convert_item_to_object, 0, 0); - __PYX_ERR(1, 499, __pyx_L5_except_error) - } - goto __pyx_L5_except_error; - - /* "View.MemoryView":496 - * - * bytesitem = itemp[:self.view.itemsize] - * try: # <<<<<<<<<<<<<< - * result = struct.unpack(self.view.format, bytesitem) - * except struct.error: - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_XGIVEREF(__pyx_t_4); - __Pyx_ExceptionReset(__pyx_t_2, __pyx_t_3, __pyx_t_4); - goto __pyx_L1_error; - __pyx_L6_except_return:; - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_XGIVEREF(__pyx_t_4); - __Pyx_ExceptionReset(__pyx_t_2, __pyx_t_3, __pyx_t_4); - goto __pyx_L0; - } - - /* "View.MemoryView":489 - * self.assign_item_from_object(itemp, value) - * - * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< - * """Only used if instantiated manually by the user, or if Cython doesn't - * know how to convert the type""" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("View.MemoryView.memoryview.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_struct); - __Pyx_XDECREF(__pyx_v_bytesitem); - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":505 - * return result - * - * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< - * """Only used if instantiated manually by the user, or if Cython doesn't - * know how to convert the type""" - */ - -static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value) { - PyObject *__pyx_v_struct = NULL; - char __pyx_v_c; - PyObject *__pyx_v_bytesvalue = 0; - Py_ssize_t __pyx_v_i; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - int __pyx_t_6; - Py_ssize_t __pyx_t_7; - PyObject *__pyx_t_8 = NULL; - char *__pyx_t_9; - char *__pyx_t_10; - char *__pyx_t_11; - char *__pyx_t_12; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("assign_item_from_object", 0); - - /* "View.MemoryView":508 - * """Only used if instantiated manually by the user, or if Cython doesn't - * know how to convert the type""" - * import struct # <<<<<<<<<<<<<< - * cdef char c - * cdef bytes bytesvalue - */ - __pyx_t_1 = __Pyx_ImportDottedModule(__pyx_n_s_struct, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 508, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_struct = __pyx_t_1; - __pyx_t_1 = 0; - - /* "View.MemoryView":513 - * cdef Py_ssize_t i - * - * if isinstance(value, tuple): # <<<<<<<<<<<<<< - * bytesvalue = struct.pack(self.view.format, *value) - * else: - */ - __pyx_t_2 = PyTuple_Check(__pyx_v_value); - if (__pyx_t_2) { - - /* "View.MemoryView":514 - * - * if isinstance(value, tuple): - * bytesvalue = struct.pack(self.view.format, *value) # <<<<<<<<<<<<<< - * else: - * bytesvalue = struct.pack(self.view.format, value) - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3)) __PYX_ERR(1, 514, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PySequence_Tuple(__pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyNumber_Add(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (!(likely(PyBytes_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_3))) __PYX_ERR(1, 514, __pyx_L1_error) - __pyx_v_bytesvalue = ((PyObject*)__pyx_t_3); - __pyx_t_3 = 0; - - /* "View.MemoryView":513 - * cdef Py_ssize_t i - * - * if isinstance(value, tuple): # <<<<<<<<<<<<<< - * bytesvalue = struct.pack(self.view.format, *value) - * else: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":516 - * bytesvalue = struct.pack(self.view.format, *value) - * else: - * bytesvalue = struct.pack(self.view.format, value) # <<<<<<<<<<<<<< - * - * for i, c in enumerate(bytesvalue): - */ - /*else*/ { - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 516, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 516, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = NULL; - __pyx_t_6 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_5))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_5); - if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); - __Pyx_INCREF(__pyx_t_4); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_5, function); - __pyx_t_6 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[3] = {__pyx_t_4, __pyx_t_1, __pyx_v_value}; - __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_5, __pyx_callargs+1-__pyx_t_6, 2+__pyx_t_6); - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 516, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - } - if (!(likely(PyBytes_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_3))) __PYX_ERR(1, 516, __pyx_L1_error) - __pyx_v_bytesvalue = ((PyObject*)__pyx_t_3); - __pyx_t_3 = 0; - } - __pyx_L3:; - - /* "View.MemoryView":518 - * bytesvalue = struct.pack(self.view.format, value) - * - * for i, c in enumerate(bytesvalue): # <<<<<<<<<<<<<< - * itemp[i] = c - * - */ - __pyx_t_7 = 0; - if (unlikely(__pyx_v_bytesvalue == Py_None)) { - PyErr_SetString(PyExc_TypeError, "'NoneType' is not iterable"); - __PYX_ERR(1, 518, __pyx_L1_error) - } - __Pyx_INCREF(__pyx_v_bytesvalue); - __pyx_t_8 = __pyx_v_bytesvalue; - __pyx_t_10 = PyBytes_AS_STRING(__pyx_t_8); - __pyx_t_11 = (__pyx_t_10 + PyBytes_GET_SIZE(__pyx_t_8)); - for (__pyx_t_12 = __pyx_t_10; __pyx_t_12 < __pyx_t_11; __pyx_t_12++) { - __pyx_t_9 = __pyx_t_12; - __pyx_v_c = (__pyx_t_9[0]); - - /* "View.MemoryView":519 - * - * for i, c in enumerate(bytesvalue): - * itemp[i] = c # <<<<<<<<<<<<<< - * - * @cname('getbuffer') - */ - __pyx_v_i = __pyx_t_7; - - /* "View.MemoryView":518 - * bytesvalue = struct.pack(self.view.format, value) - * - * for i, c in enumerate(bytesvalue): # <<<<<<<<<<<<<< - * itemp[i] = c - * - */ - __pyx_t_7 = (__pyx_t_7 + 1); - - /* "View.MemoryView":519 - * - * for i, c in enumerate(bytesvalue): - * itemp[i] = c # <<<<<<<<<<<<<< - * - * @cname('getbuffer') - */ - (__pyx_v_itemp[__pyx_v_i]) = __pyx_v_c; - } - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - - /* "View.MemoryView":505 - * return result - * - * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< - * """Only used if instantiated manually by the user, or if Cython doesn't - * know how to convert the type""" - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("View.MemoryView.memoryview.assign_item_from_object", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_struct); - __Pyx_XDECREF(__pyx_v_bytesvalue); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":521 - * itemp[i] = c - * - * @cname('getbuffer') # <<<<<<<<<<<<<< - * def __getbuffer__(self, Py_buffer *info, int flags): - * if flags & PyBUF_WRITABLE and self.view.readonly: - */ - -/* Python wrapper */ -CYTHON_UNUSED static int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/ -CYTHON_UNUSED static int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(((struct __pyx_memoryview_obj *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(struct __pyx_memoryview_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) { - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - Py_ssize_t *__pyx_t_3; - char *__pyx_t_4; - void *__pyx_t_5; - int __pyx_t_6; - Py_ssize_t __pyx_t_7; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely(__pyx_v_info == NULL)) { - PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete"); - return -1; - } - __Pyx_RefNannySetupContext("__getbuffer__", 0); - __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(__pyx_v_info->obj); - - /* "View.MemoryView":523 - * @cname('getbuffer') - * def __getbuffer__(self, Py_buffer *info, int flags): - * if flags & PyBUF_WRITABLE and self.view.readonly: # <<<<<<<<<<<<<< - * raise ValueError, "Cannot create writable memory view from read-only memoryview" - * - */ - __pyx_t_2 = ((__pyx_v_flags & PyBUF_WRITABLE) != 0); - if (__pyx_t_2) { - } else { - __pyx_t_1 = __pyx_t_2; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_1 = __pyx_v_self->view.readonly; - __pyx_L4_bool_binop_done:; - if (unlikely(__pyx_t_1)) { - - /* "View.MemoryView":524 - * def __getbuffer__(self, Py_buffer *info, int flags): - * if flags & PyBUF_WRITABLE and self.view.readonly: - * raise ValueError, "Cannot create writable memory view from read-only memoryview" # <<<<<<<<<<<<<< - * - * if flags & PyBUF_ND: - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_Cannot_create_writable_memory_vi, 0, 0); - __PYX_ERR(1, 524, __pyx_L1_error) - - /* "View.MemoryView":523 - * @cname('getbuffer') - * def __getbuffer__(self, Py_buffer *info, int flags): - * if flags & PyBUF_WRITABLE and self.view.readonly: # <<<<<<<<<<<<<< - * raise ValueError, "Cannot create writable memory view from read-only memoryview" - * - */ - } - - /* "View.MemoryView":526 - * raise ValueError, "Cannot create writable memory view from read-only memoryview" - * - * if flags & PyBUF_ND: # <<<<<<<<<<<<<< - * info.shape = self.view.shape - * else: - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_ND) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":527 - * - * if flags & PyBUF_ND: - * info.shape = self.view.shape # <<<<<<<<<<<<<< - * else: - * info.shape = NULL - */ - __pyx_t_3 = __pyx_v_self->view.shape; - __pyx_v_info->shape = __pyx_t_3; - - /* "View.MemoryView":526 - * raise ValueError, "Cannot create writable memory view from read-only memoryview" - * - * if flags & PyBUF_ND: # <<<<<<<<<<<<<< - * info.shape = self.view.shape - * else: - */ - goto __pyx_L6; - } - - /* "View.MemoryView":529 - * info.shape = self.view.shape - * else: - * info.shape = NULL # <<<<<<<<<<<<<< - * - * if flags & PyBUF_STRIDES: - */ - /*else*/ { - __pyx_v_info->shape = NULL; - } - __pyx_L6:; - - /* "View.MemoryView":531 - * info.shape = NULL - * - * if flags & PyBUF_STRIDES: # <<<<<<<<<<<<<< - * info.strides = self.view.strides - * else: - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_STRIDES) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":532 - * - * if flags & PyBUF_STRIDES: - * info.strides = self.view.strides # <<<<<<<<<<<<<< - * else: - * info.strides = NULL - */ - __pyx_t_3 = __pyx_v_self->view.strides; - __pyx_v_info->strides = __pyx_t_3; - - /* "View.MemoryView":531 - * info.shape = NULL - * - * if flags & PyBUF_STRIDES: # <<<<<<<<<<<<<< - * info.strides = self.view.strides - * else: - */ - goto __pyx_L7; - } - - /* "View.MemoryView":534 - * info.strides = self.view.strides - * else: - * info.strides = NULL # <<<<<<<<<<<<<< - * - * if flags & PyBUF_INDIRECT: - */ - /*else*/ { - __pyx_v_info->strides = NULL; - } - __pyx_L7:; - - /* "View.MemoryView":536 - * info.strides = NULL - * - * if flags & PyBUF_INDIRECT: # <<<<<<<<<<<<<< - * info.suboffsets = self.view.suboffsets - * else: - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_INDIRECT) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":537 - * - * if flags & PyBUF_INDIRECT: - * info.suboffsets = self.view.suboffsets # <<<<<<<<<<<<<< - * else: - * info.suboffsets = NULL - */ - __pyx_t_3 = __pyx_v_self->view.suboffsets; - __pyx_v_info->suboffsets = __pyx_t_3; - - /* "View.MemoryView":536 - * info.strides = NULL - * - * if flags & PyBUF_INDIRECT: # <<<<<<<<<<<<<< - * info.suboffsets = self.view.suboffsets - * else: - */ - goto __pyx_L8; - } - - /* "View.MemoryView":539 - * info.suboffsets = self.view.suboffsets - * else: - * info.suboffsets = NULL # <<<<<<<<<<<<<< - * - * if flags & PyBUF_FORMAT: - */ - /*else*/ { - __pyx_v_info->suboffsets = NULL; - } - __pyx_L8:; - - /* "View.MemoryView":541 - * info.suboffsets = NULL - * - * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< - * info.format = self.view.format - * else: - */ - __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":542 - * - * if flags & PyBUF_FORMAT: - * info.format = self.view.format # <<<<<<<<<<<<<< - * else: - * info.format = NULL - */ - __pyx_t_4 = __pyx_v_self->view.format; - __pyx_v_info->format = __pyx_t_4; - - /* "View.MemoryView":541 - * info.suboffsets = NULL - * - * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< - * info.format = self.view.format - * else: - */ - goto __pyx_L9; - } - - /* "View.MemoryView":544 - * info.format = self.view.format - * else: - * info.format = NULL # <<<<<<<<<<<<<< - * - * info.buf = self.view.buf - */ - /*else*/ { - __pyx_v_info->format = NULL; - } - __pyx_L9:; - - /* "View.MemoryView":546 - * info.format = NULL - * - * info.buf = self.view.buf # <<<<<<<<<<<<<< - * info.ndim = self.view.ndim - * info.itemsize = self.view.itemsize - */ - __pyx_t_5 = __pyx_v_self->view.buf; - __pyx_v_info->buf = __pyx_t_5; - - /* "View.MemoryView":547 - * - * info.buf = self.view.buf - * info.ndim = self.view.ndim # <<<<<<<<<<<<<< - * info.itemsize = self.view.itemsize - * info.len = self.view.len - */ - __pyx_t_6 = __pyx_v_self->view.ndim; - __pyx_v_info->ndim = __pyx_t_6; - - /* "View.MemoryView":548 - * info.buf = self.view.buf - * info.ndim = self.view.ndim - * info.itemsize = self.view.itemsize # <<<<<<<<<<<<<< - * info.len = self.view.len - * info.readonly = self.view.readonly - */ - __pyx_t_7 = __pyx_v_self->view.itemsize; - __pyx_v_info->itemsize = __pyx_t_7; - - /* "View.MemoryView":549 - * info.ndim = self.view.ndim - * info.itemsize = self.view.itemsize - * info.len = self.view.len # <<<<<<<<<<<<<< - * info.readonly = self.view.readonly - * info.obj = self - */ - __pyx_t_7 = __pyx_v_self->view.len; - __pyx_v_info->len = __pyx_t_7; - - /* "View.MemoryView":550 - * info.itemsize = self.view.itemsize - * info.len = self.view.len - * info.readonly = self.view.readonly # <<<<<<<<<<<<<< - * info.obj = self - * - */ - __pyx_t_1 = __pyx_v_self->view.readonly; - __pyx_v_info->readonly = __pyx_t_1; - - /* "View.MemoryView":551 - * info.len = self.view.len - * info.readonly = self.view.readonly - * info.obj = self # <<<<<<<<<<<<<< - * - * - */ - __Pyx_INCREF((PyObject *)__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_v_self); - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); - __pyx_v_info->obj = ((PyObject *)__pyx_v_self); - - /* "View.MemoryView":521 - * itemp[i] = c - * - * @cname('getbuffer') # <<<<<<<<<<<<<< - * def __getbuffer__(self, Py_buffer *info, int flags): - * if flags & PyBUF_WRITABLE and self.view.readonly: - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - if (__pyx_v_info->obj != NULL) { - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; - } - goto __pyx_L2; - __pyx_L0:; - if (__pyx_v_info->obj == Py_None) { - __Pyx_GOTREF(__pyx_v_info->obj); - __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = 0; - } - __pyx_L2:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":554 - * - * - * @property # <<<<<<<<<<<<<< - * def T(self): - * cdef _memoryviewslice result = memoryview_copy(self) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - struct __pyx_memoryviewslice_obj *__pyx_v_result = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":556 - * @property - * def T(self): - * cdef _memoryviewslice result = memoryview_copy(self) # <<<<<<<<<<<<<< - * transpose_memslice(&result.from_slice) - * return result - */ - __pyx_t_1 = __pyx_memoryview_copy_object(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 556, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_memoryviewslice_type))))) __PYX_ERR(1, 556, __pyx_L1_error) - __pyx_v_result = ((struct __pyx_memoryviewslice_obj *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "View.MemoryView":557 - * def T(self): - * cdef _memoryviewslice result = memoryview_copy(self) - * transpose_memslice(&result.from_slice) # <<<<<<<<<<<<<< - * return result - * - */ - __pyx_t_2 = __pyx_memslice_transpose((&__pyx_v_result->from_slice)); if (unlikely(__pyx_t_2 == ((int)-1))) __PYX_ERR(1, 557, __pyx_L1_error) - - /* "View.MemoryView":558 - * cdef _memoryviewslice result = memoryview_copy(self) - * transpose_memslice(&result.from_slice) - * return result # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_result); - __pyx_r = ((PyObject *)__pyx_v_result); - goto __pyx_L0; - - /* "View.MemoryView":554 - * - * - * @property # <<<<<<<<<<<<<< - * def T(self): - * cdef _memoryviewslice result = memoryview_copy(self) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.memoryview.T.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_result); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":560 - * return result - * - * @property # <<<<<<<<<<<<<< - * def base(self): - * return self._get_base() - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":562 - * @property - * def base(self): - * return self._get_base() # <<<<<<<<<<<<<< - * - * cdef _get_base(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->_get_base(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 562, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "View.MemoryView":560 - * return result - * - * @property # <<<<<<<<<<<<<< - * def base(self): - * return self._get_base() - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.memoryview.base.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":564 - * return self._get_base() - * - * cdef _get_base(self): # <<<<<<<<<<<<<< - * return self.obj - * - */ - -static PyObject *__pyx_memoryview__get_base(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("_get_base", 0); - - /* "View.MemoryView":565 - * - * cdef _get_base(self): - * return self.obj # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->obj); - __pyx_r = __pyx_v_self->obj; - goto __pyx_L0; - - /* "View.MemoryView":564 - * return self._get_base() - * - * cdef _get_base(self): # <<<<<<<<<<<<<< - * return self.obj - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":567 - * return self.obj - * - * @property # <<<<<<<<<<<<<< - * def shape(self): - * return tuple([length for length in self.view.shape[:self.view.ndim]]) - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - Py_ssize_t __pyx_7genexpr__pyx_v_length; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - Py_ssize_t *__pyx_t_2; - Py_ssize_t *__pyx_t_3; - Py_ssize_t *__pyx_t_4; - PyObject *__pyx_t_5 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":569 - * @property - * def shape(self): - * return tuple([length for length in self.view.shape[:self.view.ndim]]) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - { /* enter inner scope */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 569, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = (__pyx_v_self->view.shape + __pyx_v_self->view.ndim); - for (__pyx_t_4 = __pyx_v_self->view.shape; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) { - __pyx_t_2 = __pyx_t_4; - __pyx_7genexpr__pyx_v_length = (__pyx_t_2[0]); - __pyx_t_5 = PyInt_FromSsize_t(__pyx_7genexpr__pyx_v_length); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 569, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_5))) __PYX_ERR(1, 569, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - } - } /* exit inner scope */ - __pyx_t_5 = PyList_AsTuple(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 569, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_5; - __pyx_t_5 = 0; - goto __pyx_L0; - - /* "View.MemoryView":567 - * return self.obj - * - * @property # <<<<<<<<<<<<<< - * def shape(self): - * return tuple([length for length in self.view.shape[:self.view.ndim]]) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("View.MemoryView.memoryview.shape.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":571 - * return tuple([length for length in self.view.shape[:self.view.ndim]]) - * - * @property # <<<<<<<<<<<<<< - * def strides(self): - * if self.view.strides == NULL: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - Py_ssize_t __pyx_8genexpr1__pyx_v_stride; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - Py_ssize_t *__pyx_t_3; - Py_ssize_t *__pyx_t_4; - Py_ssize_t *__pyx_t_5; - PyObject *__pyx_t_6 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":573 - * @property - * def strides(self): - * if self.view.strides == NULL: # <<<<<<<<<<<<<< - * - * raise ValueError, "Buffer view does not expose strides" - */ - __pyx_t_1 = (__pyx_v_self->view.strides == NULL); - if (unlikely(__pyx_t_1)) { - - /* "View.MemoryView":575 - * if self.view.strides == NULL: - * - * raise ValueError, "Buffer view does not expose strides" # <<<<<<<<<<<<<< - * - * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_Buffer_view_does_not_expose_stri, 0, 0); - __PYX_ERR(1, 575, __pyx_L1_error) - - /* "View.MemoryView":573 - * @property - * def strides(self): - * if self.view.strides == NULL: # <<<<<<<<<<<<<< - * - * raise ValueError, "Buffer view does not expose strides" - */ - } - - /* "View.MemoryView":577 - * raise ValueError, "Buffer view does not expose strides" - * - * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - { /* enter inner scope */ - __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 577, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = (__pyx_v_self->view.strides + __pyx_v_self->view.ndim); - for (__pyx_t_5 = __pyx_v_self->view.strides; __pyx_t_5 < __pyx_t_4; __pyx_t_5++) { - __pyx_t_3 = __pyx_t_5; - __pyx_8genexpr1__pyx_v_stride = (__pyx_t_3[0]); - __pyx_t_6 = PyInt_FromSsize_t(__pyx_8genexpr1__pyx_v_stride); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 577, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_2, (PyObject*)__pyx_t_6))) __PYX_ERR(1, 577, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } - } /* exit inner scope */ - __pyx_t_6 = PyList_AsTuple(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 577, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L0; - - /* "View.MemoryView":571 - * return tuple([length for length in self.view.shape[:self.view.ndim]]) - * - * @property # <<<<<<<<<<<<<< - * def strides(self): - * if self.view.strides == NULL: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_AddTraceback("View.MemoryView.memoryview.strides.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":579 - * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) - * - * @property # <<<<<<<<<<<<<< - * def suboffsets(self): - * if self.view.suboffsets == NULL: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_10suboffsets_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_10suboffsets_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - Py_ssize_t __pyx_8genexpr2__pyx_v_suboffset; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - Py_ssize_t *__pyx_t_3; - Py_ssize_t *__pyx_t_4; - Py_ssize_t *__pyx_t_5; - PyObject *__pyx_t_6 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":581 - * @property - * def suboffsets(self): - * if self.view.suboffsets == NULL: # <<<<<<<<<<<<<< - * return (-1,) * self.view.ndim - * - */ - __pyx_t_1 = (__pyx_v_self->view.suboffsets == NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":582 - * def suboffsets(self): - * if self.view.suboffsets == NULL: - * return (-1,) * self.view.ndim # <<<<<<<<<<<<<< - * - * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PySequence_Multiply(__pyx_tuple__4, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 582, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":581 - * @property - * def suboffsets(self): - * if self.view.suboffsets == NULL: # <<<<<<<<<<<<<< - * return (-1,) * self.view.ndim - * - */ - } - - /* "View.MemoryView":584 - * return (-1,) * self.view.ndim - * - * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - { /* enter inner scope */ - __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 584, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = (__pyx_v_self->view.suboffsets + __pyx_v_self->view.ndim); - for (__pyx_t_5 = __pyx_v_self->view.suboffsets; __pyx_t_5 < __pyx_t_4; __pyx_t_5++) { - __pyx_t_3 = __pyx_t_5; - __pyx_8genexpr2__pyx_v_suboffset = (__pyx_t_3[0]); - __pyx_t_6 = PyInt_FromSsize_t(__pyx_8genexpr2__pyx_v_suboffset); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 584, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_2, (PyObject*)__pyx_t_6))) __PYX_ERR(1, 584, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } - } /* exit inner scope */ - __pyx_t_6 = PyList_AsTuple(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 584, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; - goto __pyx_L0; - - /* "View.MemoryView":579 - * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) - * - * @property # <<<<<<<<<<<<<< - * def suboffsets(self): - * if self.view.suboffsets == NULL: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_AddTraceback("View.MemoryView.memoryview.suboffsets.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":586 - * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) - * - * @property # <<<<<<<<<<<<<< - * def ndim(self): - * return self.view.ndim - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":588 - * @property - * def ndim(self): - * return self.view.ndim # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 588, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "View.MemoryView":586 - * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) - * - * @property # <<<<<<<<<<<<<< - * def ndim(self): - * return self.view.ndim - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.memoryview.ndim.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":590 - * return self.view.ndim - * - * @property # <<<<<<<<<<<<<< - * def itemsize(self): - * return self.view.itemsize - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":592 - * @property - * def itemsize(self): - * return self.view.itemsize # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 592, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "View.MemoryView":590 - * return self.view.ndim - * - * @property # <<<<<<<<<<<<<< - * def itemsize(self): - * return self.view.itemsize - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.memoryview.itemsize.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":594 - * return self.view.itemsize - * - * @property # <<<<<<<<<<<<<< - * def nbytes(self): - * return self.size * self.view.itemsize - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":596 - * @property - * def nbytes(self): - * return self.size * self.view.itemsize # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 596, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 596, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_Multiply(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 596, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "View.MemoryView":594 - * return self.view.itemsize - * - * @property # <<<<<<<<<<<<<< - * def nbytes(self): - * return self.size * self.view.itemsize - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.memoryview.nbytes.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":598 - * return self.size * self.view.itemsize - * - * @property # <<<<<<<<<<<<<< - * def size(self): - * if self._size is None: - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_v_result = NULL; - PyObject *__pyx_v_length = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - Py_ssize_t *__pyx_t_2; - Py_ssize_t *__pyx_t_3; - Py_ssize_t *__pyx_t_4; - PyObject *__pyx_t_5 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "View.MemoryView":600 - * @property - * def size(self): - * if self._size is None: # <<<<<<<<<<<<<< - * result = 1 - * - */ - __pyx_t_1 = (__pyx_v_self->_size == Py_None); - if (__pyx_t_1) { - - /* "View.MemoryView":601 - * def size(self): - * if self._size is None: - * result = 1 # <<<<<<<<<<<<<< - * - * for length in self.view.shape[:self.view.ndim]: - */ - __Pyx_INCREF(__pyx_int_1); - __pyx_v_result = __pyx_int_1; - - /* "View.MemoryView":603 - * result = 1 - * - * for length in self.view.shape[:self.view.ndim]: # <<<<<<<<<<<<<< - * result *= length - * - */ - __pyx_t_3 = (__pyx_v_self->view.shape + __pyx_v_self->view.ndim); - for (__pyx_t_4 = __pyx_v_self->view.shape; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) { - __pyx_t_2 = __pyx_t_4; - __pyx_t_5 = PyInt_FromSsize_t((__pyx_t_2[0])); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 603, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_XDECREF_SET(__pyx_v_length, __pyx_t_5); - __pyx_t_5 = 0; - - /* "View.MemoryView":604 - * - * for length in self.view.shape[:self.view.ndim]: - * result *= length # <<<<<<<<<<<<<< - * - * self._size = result - */ - __pyx_t_5 = PyNumber_InPlaceMultiply(__pyx_v_result, __pyx_v_length); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 604, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF_SET(__pyx_v_result, __pyx_t_5); - __pyx_t_5 = 0; - } - - /* "View.MemoryView":606 - * result *= length - * - * self._size = result # <<<<<<<<<<<<<< - * - * return self._size - */ - __Pyx_INCREF(__pyx_v_result); - __Pyx_GIVEREF(__pyx_v_result); - __Pyx_GOTREF(__pyx_v_self->_size); - __Pyx_DECREF(__pyx_v_self->_size); - __pyx_v_self->_size = __pyx_v_result; - - /* "View.MemoryView":600 - * @property - * def size(self): - * if self._size is None: # <<<<<<<<<<<<<< - * result = 1 - * - */ - } - - /* "View.MemoryView":608 - * self._size = result - * - * return self._size # <<<<<<<<<<<<<< - * - * def __len__(self): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->_size); - __pyx_r = __pyx_v_self->_size; - goto __pyx_L0; - - /* "View.MemoryView":598 - * return self.size * self.view.itemsize - * - * @property # <<<<<<<<<<<<<< - * def size(self): - * if self._size is None: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("View.MemoryView.memoryview.size.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XDECREF(__pyx_v_length); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":610 - * return self._size - * - * def __len__(self): # <<<<<<<<<<<<<< - * if self.view.ndim >= 1: - * return self.view.shape[0] - */ - -/* Python wrapper */ -static Py_ssize_t __pyx_memoryview___len__(PyObject *__pyx_v_self); /*proto*/ -static Py_ssize_t __pyx_memoryview___len__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(struct __pyx_memoryview_obj *__pyx_v_self) { - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - __Pyx_RefNannySetupContext("__len__", 0); - - /* "View.MemoryView":611 - * - * def __len__(self): - * if self.view.ndim >= 1: # <<<<<<<<<<<<<< - * return self.view.shape[0] - * - */ - __pyx_t_1 = (__pyx_v_self->view.ndim >= 1); - if (__pyx_t_1) { - - /* "View.MemoryView":612 - * def __len__(self): - * if self.view.ndim >= 1: - * return self.view.shape[0] # <<<<<<<<<<<<<< - * - * return 0 - */ - __pyx_r = (__pyx_v_self->view.shape[0]); - goto __pyx_L0; - - /* "View.MemoryView":611 - * - * def __len__(self): - * if self.view.ndim >= 1: # <<<<<<<<<<<<<< - * return self.view.shape[0] - * - */ - } - - /* "View.MemoryView":614 - * return self.view.shape[0] - * - * return 0 # <<<<<<<<<<<<<< - * - * def __repr__(self): - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":610 - * return self._size - * - * def __len__(self): # <<<<<<<<<<<<<< - * if self.view.ndim >= 1: - * return self.view.shape[0] - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":616 - * return 0 - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "" % (self.base.__class__.__name__, - * id(self)) - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview___repr__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_memoryview___repr__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__repr__", 0); - - /* "View.MemoryView":617 - * - * def __repr__(self): - * return "" % (self.base.__class__.__name__, # <<<<<<<<<<<<<< - * id(self)) - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 617, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 617, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 617, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "View.MemoryView":618 - * def __repr__(self): - * return "" % (self.base.__class__.__name__, - * id(self)) # <<<<<<<<<<<<<< - * - * def __str__(self): - */ - __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 618, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - - /* "View.MemoryView":617 - * - * def __repr__(self): - * return "" % (self.base.__class__.__name__, # <<<<<<<<<<<<<< - * id(self)) - * - */ - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 617, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(1, 617, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2)) __PYX_ERR(1, 617, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_at_0x_x, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 617, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":616 - * return 0 - * - * def __repr__(self): # <<<<<<<<<<<<<< - * return "" % (self.base.__class__.__name__, - * id(self)) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.memoryview.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":620 - * id(self)) - * - * def __str__(self): # <<<<<<<<<<<<<< - * return "" % (self.base.__class__.__name__,) - * - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview___str__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_memoryview___str__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__str__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__str__", 0); - - /* "View.MemoryView":621 - * - * def __str__(self): - * return "" % (self.base.__class__.__name__,) # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 621, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 621, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 621, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 621, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1)) __PYX_ERR(1, 621, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_object, __pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 621, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "View.MemoryView":620 - * id(self)) - * - * def __str__(self): # <<<<<<<<<<<<<< - * return "" % (self.base.__class__.__name__,) - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":624 - * - * - * def is_c_contig(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice *mslice - * cdef __Pyx_memviewslice tmp - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview_is_c_contig(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_memoryview_is_c_contig(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("is_c_contig (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 624, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("is_c_contig", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "is_c_contig", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.is_c_contig", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(struct __pyx_memoryview_obj *__pyx_v_self) { - __Pyx_memviewslice *__pyx_v_mslice; - __Pyx_memviewslice __pyx_v_tmp; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_memviewslice *__pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("is_c_contig", 0); - - /* "View.MemoryView":627 - * cdef __Pyx_memviewslice *mslice - * cdef __Pyx_memviewslice tmp - * mslice = get_slice_from_memview(self, &tmp) # <<<<<<<<<<<<<< - * return slice_is_contig(mslice[0], 'C', self.view.ndim) - * - */ - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 627, __pyx_L1_error) - __pyx_v_mslice = __pyx_t_1; - - /* "View.MemoryView":628 - * cdef __Pyx_memviewslice tmp - * mslice = get_slice_from_memview(self, &tmp) - * return slice_is_contig(mslice[0], 'C', self.view.ndim) # <<<<<<<<<<<<<< - * - * def is_f_contig(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'C', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 628, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":624 - * - * - * def is_c_contig(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice *mslice - * cdef __Pyx_memviewslice tmp - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.is_c_contig", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":630 - * return slice_is_contig(mslice[0], 'C', self.view.ndim) - * - * def is_f_contig(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice *mslice - * cdef __Pyx_memviewslice tmp - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview_is_f_contig(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_memoryview_is_f_contig(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("is_f_contig (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 630, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("is_f_contig", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "is_f_contig", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.is_f_contig", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(struct __pyx_memoryview_obj *__pyx_v_self) { - __Pyx_memviewslice *__pyx_v_mslice; - __Pyx_memviewslice __pyx_v_tmp; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_memviewslice *__pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("is_f_contig", 0); - - /* "View.MemoryView":633 - * cdef __Pyx_memviewslice *mslice - * cdef __Pyx_memviewslice tmp - * mslice = get_slice_from_memview(self, &tmp) # <<<<<<<<<<<<<< - * return slice_is_contig(mslice[0], 'F', self.view.ndim) - * - */ - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 633, __pyx_L1_error) - __pyx_v_mslice = __pyx_t_1; - - /* "View.MemoryView":634 - * cdef __Pyx_memviewslice tmp - * mslice = get_slice_from_memview(self, &tmp) - * return slice_is_contig(mslice[0], 'F', self.view.ndim) # <<<<<<<<<<<<<< - * - * def copy(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'F', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 634, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":630 - * return slice_is_contig(mslice[0], 'C', self.view.ndim) - * - * def is_f_contig(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice *mslice - * cdef __Pyx_memviewslice tmp - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.is_f_contig", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":636 - * return slice_is_contig(mslice[0], 'F', self.view.ndim) - * - * def copy(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice mslice - * cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview_copy(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_memoryview_copy(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("copy (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 636, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("copy", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "copy", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.copy", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(struct __pyx_memoryview_obj *__pyx_v_self) { - __Pyx_memviewslice __pyx_v_mslice; - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_memviewslice __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("copy", 0); - - /* "View.MemoryView":638 - * def copy(self): - * cdef __Pyx_memviewslice mslice - * cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS # <<<<<<<<<<<<<< - * - * slice_copy(self, &mslice) - */ - __pyx_v_flags = (__pyx_v_self->flags & (~PyBUF_F_CONTIGUOUS)); - - /* "View.MemoryView":640 - * cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS - * - * slice_copy(self, &mslice) # <<<<<<<<<<<<<< - * mslice = slice_copy_contig(&mslice, "c", self.view.ndim, - * self.view.itemsize, - */ - __pyx_memoryview_slice_copy(__pyx_v_self, (&__pyx_v_mslice)); - - /* "View.MemoryView":641 - * - * slice_copy(self, &mslice) - * mslice = slice_copy_contig(&mslice, "c", self.view.ndim, # <<<<<<<<<<<<<< - * self.view.itemsize, - * flags|PyBUF_C_CONTIGUOUS, - */ - __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_mslice), ((char *)"c"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_C_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 641, __pyx_L1_error) - __pyx_v_mslice = __pyx_t_1; - - /* "View.MemoryView":646 - * self.dtype_is_object) - * - * return memoryview_copy_from_slice(self, &mslice) # <<<<<<<<<<<<<< - * - * def copy_fortran(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_mslice)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 646, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":636 - * return slice_is_contig(mslice[0], 'F', self.view.ndim) - * - * def copy(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice mslice - * cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.copy", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":648 - * return memoryview_copy_from_slice(self, &mslice) - * - * def copy_fortran(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice src, dst - * cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS - */ - -/* Python wrapper */ -static PyObject *__pyx_memoryview_copy_fortran(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_memoryview_copy_fortran(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("copy_fortran (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 648, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("copy_fortran", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "copy_fortran", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.copy_fortran", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(struct __pyx_memoryview_obj *__pyx_v_self) { - __Pyx_memviewslice __pyx_v_src; - __Pyx_memviewslice __pyx_v_dst; - int __pyx_v_flags; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_memviewslice __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("copy_fortran", 0); - - /* "View.MemoryView":650 - * def copy_fortran(self): - * cdef __Pyx_memviewslice src, dst - * cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS # <<<<<<<<<<<<<< - * - * slice_copy(self, &src) - */ - __pyx_v_flags = (__pyx_v_self->flags & (~PyBUF_C_CONTIGUOUS)); - - /* "View.MemoryView":652 - * cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS - * - * slice_copy(self, &src) # <<<<<<<<<<<<<< - * dst = slice_copy_contig(&src, "fortran", self.view.ndim, - * self.view.itemsize, - */ - __pyx_memoryview_slice_copy(__pyx_v_self, (&__pyx_v_src)); - - /* "View.MemoryView":653 - * - * slice_copy(self, &src) - * dst = slice_copy_contig(&src, "fortran", self.view.ndim, # <<<<<<<<<<<<<< - * self.view.itemsize, - * flags|PyBUF_F_CONTIGUOUS, - */ - __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_src), ((char *)"fortran"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_F_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 653, __pyx_L1_error) - __pyx_v_dst = __pyx_t_1; - - /* "View.MemoryView":658 - * self.dtype_is_object) - * - * return memoryview_copy_from_slice(self, &dst) # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_dst)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 658, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":648 - * return memoryview_copy_from_slice(self, &mslice) - * - * def copy_fortran(self): # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice src, dst - * cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.memoryview.copy_fortran", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_memoryview_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_memoryview_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_memoryview___reduce_cython__(((struct __pyx_memoryview_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_memoryview___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_memoryview_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_memoryview_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.memoryview.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_memoryview_2__setstate_cython__(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.memoryview.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":662 - * - * @cname('__pyx_memoryview_new') - * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): # <<<<<<<<<<<<<< - * cdef memoryview result = memoryview(o, flags, dtype_is_object) - * result.typeinfo = typeinfo - */ - -static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, int __pyx_v_dtype_is_object, __Pyx_TypeInfo *__pyx_v_typeinfo) { - struct __pyx_memoryview_obj *__pyx_v_result = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("memoryview_cwrapper", 0); - - /* "View.MemoryView":663 - * @cname('__pyx_memoryview_new') - * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): - * cdef memoryview result = memoryview(o, flags, dtype_is_object) # <<<<<<<<<<<<<< - * result.typeinfo = typeinfo - * return result - */ - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 663, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 663, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 663, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_o); - __Pyx_GIVEREF(__pyx_v_o); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_o)) __PYX_ERR(1, 663, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_1)) __PYX_ERR(1, 663, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2)) __PYX_ERR(1, 663, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 663, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_result = ((struct __pyx_memoryview_obj *)__pyx_t_2); - __pyx_t_2 = 0; - - /* "View.MemoryView":664 - * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): - * cdef memoryview result = memoryview(o, flags, dtype_is_object) - * result.typeinfo = typeinfo # <<<<<<<<<<<<<< - * return result - * - */ - __pyx_v_result->typeinfo = __pyx_v_typeinfo; - - /* "View.MemoryView":665 - * cdef memoryview result = memoryview(o, flags, dtype_is_object) - * result.typeinfo = typeinfo - * return result # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_check') - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_result); - __pyx_r = ((PyObject *)__pyx_v_result); - goto __pyx_L0; - - /* "View.MemoryView":662 - * - * @cname('__pyx_memoryview_new') - * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): # <<<<<<<<<<<<<< - * cdef memoryview result = memoryview(o, flags, dtype_is_object) - * result.typeinfo = typeinfo - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.memoryview_cwrapper", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_result); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":668 - * - * @cname('__pyx_memoryview_check') - * cdef inline bint memoryview_check(object o) noexcept: # <<<<<<<<<<<<<< - * return isinstance(o, memoryview) - * - */ - -static CYTHON_INLINE int __pyx_memoryview_check(PyObject *__pyx_v_o) { - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - __Pyx_RefNannySetupContext("memoryview_check", 0); - - /* "View.MemoryView":669 - * @cname('__pyx_memoryview_check') - * cdef inline bint memoryview_check(object o) noexcept: - * return isinstance(o, memoryview) # <<<<<<<<<<<<<< - * - * cdef tuple _unellipsify(object index, int ndim): - */ - __pyx_t_1 = __Pyx_TypeCheck(__pyx_v_o, __pyx_memoryview_type); - __pyx_r = __pyx_t_1; - goto __pyx_L0; - - /* "View.MemoryView":668 - * - * @cname('__pyx_memoryview_check') - * cdef inline bint memoryview_check(object o) noexcept: # <<<<<<<<<<<<<< - * return isinstance(o, memoryview) - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":671 - * return isinstance(o, memoryview) - * - * cdef tuple _unellipsify(object index, int ndim): # <<<<<<<<<<<<<< - * """ - * Replace all ellipses with full slices and fill incomplete indices with - */ - -static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { - Py_ssize_t __pyx_v_idx; - PyObject *__pyx_v_tup = NULL; - PyObject *__pyx_v_result = NULL; - int __pyx_v_have_slices; - int __pyx_v_seen_ellipsis; - PyObject *__pyx_v_item = NULL; - Py_ssize_t __pyx_v_nslices; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - Py_ssize_t __pyx_t_5; - Py_UCS4 __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("_unellipsify", 0); - - /* "View.MemoryView":677 - * """ - * cdef Py_ssize_t idx - * tup = index if isinstance(index, tuple) else (index,) # <<<<<<<<<<<<<< - * - * result = [slice(None)] * ndim - */ - __pyx_t_2 = PyTuple_Check(__pyx_v_index); - if (__pyx_t_2) { - __Pyx_INCREF(((PyObject*)__pyx_v_index)); - __pyx_t_1 = __pyx_v_index; - } else { - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 677, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(__pyx_v_index); - __Pyx_GIVEREF(__pyx_v_index); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_index)) __PYX_ERR(1, 677, __pyx_L1_error); - __pyx_t_1 = __pyx_t_3; - __pyx_t_3 = 0; - } - __pyx_v_tup = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "View.MemoryView":679 - * tup = index if isinstance(index, tuple) else (index,) - * - * result = [slice(None)] * ndim # <<<<<<<<<<<<<< - * have_slices = False - * seen_ellipsis = False - */ - __pyx_t_1 = PyList_New(1 * ((__pyx_v_ndim<0) ? 0:__pyx_v_ndim)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 679, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - { Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < __pyx_v_ndim; __pyx_temp++) { - __Pyx_INCREF(__pyx_slice__5); - __Pyx_GIVEREF(__pyx_slice__5); - if (__Pyx_PyList_SET_ITEM(__pyx_t_1, __pyx_temp, __pyx_slice__5)) __PYX_ERR(1, 679, __pyx_L1_error); - } - } - __pyx_v_result = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "View.MemoryView":680 - * - * result = [slice(None)] * ndim - * have_slices = False # <<<<<<<<<<<<<< - * seen_ellipsis = False - * idx = 0 - */ - __pyx_v_have_slices = 0; - - /* "View.MemoryView":681 - * result = [slice(None)] * ndim - * have_slices = False - * seen_ellipsis = False # <<<<<<<<<<<<<< - * idx = 0 - * for item in tup: - */ - __pyx_v_seen_ellipsis = 0; - - /* "View.MemoryView":682 - * have_slices = False - * seen_ellipsis = False - * idx = 0 # <<<<<<<<<<<<<< - * for item in tup: - * if item is Ellipsis: - */ - __pyx_v_idx = 0; - - /* "View.MemoryView":683 - * seen_ellipsis = False - * idx = 0 - * for item in tup: # <<<<<<<<<<<<<< - * if item is Ellipsis: - * if not seen_ellipsis: - */ - if (unlikely(__pyx_v_tup == Py_None)) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); - __PYX_ERR(1, 683, __pyx_L1_error) - } - __pyx_t_1 = __pyx_v_tup; __Pyx_INCREF(__pyx_t_1); __pyx_t_4 = 0; - for (;;) { - if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_1)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_4); __Pyx_INCREF(__pyx_t_3); __pyx_t_4++; if (unlikely((0 < 0))) __PYX_ERR(1, 683, __pyx_L1_error) - #else - __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 683, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - #endif - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_3); - __pyx_t_3 = 0; - - /* "View.MemoryView":684 - * idx = 0 - * for item in tup: - * if item is Ellipsis: # <<<<<<<<<<<<<< - * if not seen_ellipsis: - * idx += ndim - len(tup) - */ - __pyx_t_2 = (__pyx_v_item == __pyx_builtin_Ellipsis); - if (__pyx_t_2) { - - /* "View.MemoryView":685 - * for item in tup: - * if item is Ellipsis: - * if not seen_ellipsis: # <<<<<<<<<<<<<< - * idx += ndim - len(tup) - * seen_ellipsis = True - */ - __pyx_t_2 = (!__pyx_v_seen_ellipsis); - if (__pyx_t_2) { - - /* "View.MemoryView":686 - * if item is Ellipsis: - * if not seen_ellipsis: - * idx += ndim - len(tup) # <<<<<<<<<<<<<< - * seen_ellipsis = True - * have_slices = True - */ - if (unlikely(__pyx_v_tup == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(1, 686, __pyx_L1_error) - } - __pyx_t_5 = __Pyx_PyTuple_GET_SIZE(__pyx_v_tup); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(1, 686, __pyx_L1_error) - __pyx_v_idx = (__pyx_v_idx + (__pyx_v_ndim - __pyx_t_5)); - - /* "View.MemoryView":687 - * if not seen_ellipsis: - * idx += ndim - len(tup) - * seen_ellipsis = True # <<<<<<<<<<<<<< - * have_slices = True - * else: - */ - __pyx_v_seen_ellipsis = 1; - - /* "View.MemoryView":685 - * for item in tup: - * if item is Ellipsis: - * if not seen_ellipsis: # <<<<<<<<<<<<<< - * idx += ndim - len(tup) - * seen_ellipsis = True - */ - } - - /* "View.MemoryView":688 - * idx += ndim - len(tup) - * seen_ellipsis = True - * have_slices = True # <<<<<<<<<<<<<< - * else: - * if isinstance(item, slice): - */ - __pyx_v_have_slices = 1; - - /* "View.MemoryView":684 - * idx = 0 - * for item in tup: - * if item is Ellipsis: # <<<<<<<<<<<<<< - * if not seen_ellipsis: - * idx += ndim - len(tup) - */ - goto __pyx_L5; - } - - /* "View.MemoryView":690 - * have_slices = True - * else: - * if isinstance(item, slice): # <<<<<<<<<<<<<< - * have_slices = True - * elif not PyIndex_Check(item): - */ - /*else*/ { - __pyx_t_2 = PySlice_Check(__pyx_v_item); - if (__pyx_t_2) { - - /* "View.MemoryView":691 - * else: - * if isinstance(item, slice): - * have_slices = True # <<<<<<<<<<<<<< - * elif not PyIndex_Check(item): - * raise TypeError, f"Cannot index with type '{type(item)}'" - */ - __pyx_v_have_slices = 1; - - /* "View.MemoryView":690 - * have_slices = True - * else: - * if isinstance(item, slice): # <<<<<<<<<<<<<< - * have_slices = True - * elif not PyIndex_Check(item): - */ - goto __pyx_L7; - } - - /* "View.MemoryView":692 - * if isinstance(item, slice): - * have_slices = True - * elif not PyIndex_Check(item): # <<<<<<<<<<<<<< - * raise TypeError, f"Cannot index with type '{type(item)}'" - * result[idx] = item - */ - __pyx_t_2 = (!(PyIndex_Check(__pyx_v_item) != 0)); - if (unlikely(__pyx_t_2)) { - - /* "View.MemoryView":693 - * have_slices = True - * elif not PyIndex_Check(item): - * raise TypeError, f"Cannot index with type '{type(item)}'" # <<<<<<<<<<<<<< - * result[idx] = item - * idx += 1 - */ - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 693, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = 0; - __pyx_t_6 = 127; - __Pyx_INCREF(__pyx_kp_u_Cannot_index_with_type); - __pyx_t_5 += 24; - __Pyx_GIVEREF(__pyx_kp_u_Cannot_index_with_type); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u_Cannot_index_with_type); - __pyx_t_7 = __Pyx_PyObject_FormatSimple(((PyObject *)Py_TYPE(__pyx_v_item)), __pyx_empty_unicode); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 693, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_6 = (__Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_7) > __pyx_t_6) ? __Pyx_PyUnicode_MAX_CHAR_VALUE(__pyx_t_7) : __pyx_t_6; - __pyx_t_5 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_7); - __pyx_t_7 = 0; - __Pyx_INCREF(__pyx_kp_u__6); - __pyx_t_5 += 1; - __Pyx_GIVEREF(__pyx_kp_u__6); - PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__6); - __pyx_t_7 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_5, __pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 693, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_t_7, 0, 0); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __PYX_ERR(1, 693, __pyx_L1_error) - - /* "View.MemoryView":692 - * if isinstance(item, slice): - * have_slices = True - * elif not PyIndex_Check(item): # <<<<<<<<<<<<<< - * raise TypeError, f"Cannot index with type '{type(item)}'" - * result[idx] = item - */ - } - __pyx_L7:; - - /* "View.MemoryView":694 - * elif not PyIndex_Check(item): - * raise TypeError, f"Cannot index with type '{type(item)}'" - * result[idx] = item # <<<<<<<<<<<<<< - * idx += 1 - * - */ - if (unlikely((__Pyx_SetItemInt(__pyx_v_result, __pyx_v_idx, __pyx_v_item, Py_ssize_t, 1, PyInt_FromSsize_t, 1, 1, 1) < 0))) __PYX_ERR(1, 694, __pyx_L1_error) - } - __pyx_L5:; - - /* "View.MemoryView":695 - * raise TypeError, f"Cannot index with type '{type(item)}'" - * result[idx] = item - * idx += 1 # <<<<<<<<<<<<<< - * - * nslices = ndim - idx - */ - __pyx_v_idx = (__pyx_v_idx + 1); - - /* "View.MemoryView":683 - * seen_ellipsis = False - * idx = 0 - * for item in tup: # <<<<<<<<<<<<<< - * if item is Ellipsis: - * if not seen_ellipsis: - */ - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "View.MemoryView":697 - * idx += 1 - * - * nslices = ndim - idx # <<<<<<<<<<<<<< - * return have_slices or nslices, tuple(result) - * - */ - __pyx_v_nslices = (__pyx_v_ndim - __pyx_v_idx); - - /* "View.MemoryView":698 - * - * nslices = ndim - idx - * return have_slices or nslices, tuple(result) # <<<<<<<<<<<<<< - * - * cdef int assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim) except -1: - */ - __Pyx_XDECREF(__pyx_r); - if (!__pyx_v_have_slices) { - } else { - __pyx_t_7 = __Pyx_PyBool_FromLong(__pyx_v_have_slices); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 698, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __pyx_t_7; - __pyx_t_7 = 0; - goto __pyx_L9_bool_binop_done; - } - __pyx_t_7 = PyInt_FromSsize_t(__pyx_v_nslices); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 698, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __pyx_t_7; - __pyx_t_7 = 0; - __pyx_L9_bool_binop_done:; - __pyx_t_7 = PyList_AsTuple(__pyx_v_result); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 698, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 698, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(1, 698, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_7); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_7)) __PYX_ERR(1, 698, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_7 = 0; - __pyx_r = ((PyObject*)__pyx_t_3); - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "View.MemoryView":671 - * return isinstance(o, memoryview) - * - * cdef tuple _unellipsify(object index, int ndim): # <<<<<<<<<<<<<< - * """ - * Replace all ellipses with full slices and fill incomplete indices with - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("View.MemoryView._unellipsify", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_tup); - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XDECREF(__pyx_v_item); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":700 - * return have_slices or nslices, tuple(result) - * - * cdef int assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim) except -1: # <<<<<<<<<<<<<< - * for suboffset in suboffsets[:ndim]: - * if suboffset >= 0: - */ - -static int assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __pyx_v_ndim) { - Py_ssize_t __pyx_v_suboffset; - int __pyx_r; - __Pyx_RefNannyDeclarations - Py_ssize_t *__pyx_t_1; - Py_ssize_t *__pyx_t_2; - Py_ssize_t *__pyx_t_3; - int __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("assert_direct_dimensions", 0); - - /* "View.MemoryView":701 - * - * cdef int assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim) except -1: - * for suboffset in suboffsets[:ndim]: # <<<<<<<<<<<<<< - * if suboffset >= 0: - * raise ValueError, "Indirect dimensions not supported" - */ - __pyx_t_2 = (__pyx_v_suboffsets + __pyx_v_ndim); - for (__pyx_t_3 = __pyx_v_suboffsets; __pyx_t_3 < __pyx_t_2; __pyx_t_3++) { - __pyx_t_1 = __pyx_t_3; - __pyx_v_suboffset = (__pyx_t_1[0]); - - /* "View.MemoryView":702 - * cdef int assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim) except -1: - * for suboffset in suboffsets[:ndim]: - * if suboffset >= 0: # <<<<<<<<<<<<<< - * raise ValueError, "Indirect dimensions not supported" - * return 0 # return type just used as an error flag - */ - __pyx_t_4 = (__pyx_v_suboffset >= 0); - if (unlikely(__pyx_t_4)) { - - /* "View.MemoryView":703 - * for suboffset in suboffsets[:ndim]: - * if suboffset >= 0: - * raise ValueError, "Indirect dimensions not supported" # <<<<<<<<<<<<<< - * return 0 # return type just used as an error flag - * - */ - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_kp_s_Indirect_dimensions_not_supporte, 0, 0); - __PYX_ERR(1, 703, __pyx_L1_error) - - /* "View.MemoryView":702 - * cdef int assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim) except -1: - * for suboffset in suboffsets[:ndim]: - * if suboffset >= 0: # <<<<<<<<<<<<<< - * raise ValueError, "Indirect dimensions not supported" - * return 0 # return type just used as an error flag - */ - } - } - - /* "View.MemoryView":704 - * if suboffset >= 0: - * raise ValueError, "Indirect dimensions not supported" - * return 0 # return type just used as an error flag # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":700 - * return have_slices or nslices, tuple(result) - * - * cdef int assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim) except -1: # <<<<<<<<<<<<<< - * for suboffset in suboffsets[:ndim]: - * if suboffset >= 0: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView.assert_direct_dimensions", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":711 - * - * @cname('__pyx_memview_slice') - * cdef memoryview memview_slice(memoryview memview, object indices): # <<<<<<<<<<<<<< - * cdef int new_ndim = 0, suboffset_dim = -1, dim - * cdef bint negative_step - */ - -static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_obj *__pyx_v_memview, PyObject *__pyx_v_indices) { - int __pyx_v_new_ndim; - int __pyx_v_suboffset_dim; - int __pyx_v_dim; - __Pyx_memviewslice __pyx_v_src; - __Pyx_memviewslice __pyx_v_dst; - __Pyx_memviewslice *__pyx_v_p_src; - struct __pyx_memoryviewslice_obj *__pyx_v_memviewsliceobj = 0; - __Pyx_memviewslice *__pyx_v_p_dst; - int *__pyx_v_p_suboffset_dim; - Py_ssize_t __pyx_v_start; - Py_ssize_t __pyx_v_stop; - Py_ssize_t __pyx_v_step; - Py_ssize_t __pyx_v_cindex; - int __pyx_v_have_start; - int __pyx_v_have_stop; - int __pyx_v_have_step; - PyObject *__pyx_v_index = NULL; - struct __pyx_memoryview_obj *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - struct __pyx_memoryview_obj *__pyx_t_3; - char *__pyx_t_4; - int __pyx_t_5; - Py_ssize_t __pyx_t_6; - PyObject *(*__pyx_t_7)(PyObject *); - PyObject *__pyx_t_8 = NULL; - Py_ssize_t __pyx_t_9; - int __pyx_t_10; - Py_ssize_t __pyx_t_11; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("memview_slice", 0); - - /* "View.MemoryView":712 - * @cname('__pyx_memview_slice') - * cdef memoryview memview_slice(memoryview memview, object indices): - * cdef int new_ndim = 0, suboffset_dim = -1, dim # <<<<<<<<<<<<<< - * cdef bint negative_step - * cdef __Pyx_memviewslice src, dst - */ - __pyx_v_new_ndim = 0; - __pyx_v_suboffset_dim = -1; - - /* "View.MemoryView":719 - * - * - * memset(&dst, 0, sizeof(dst)) # <<<<<<<<<<<<<< - * - * cdef _memoryviewslice memviewsliceobj - */ - (void)(memset((&__pyx_v_dst), 0, (sizeof(__pyx_v_dst)))); - - /* "View.MemoryView":723 - * cdef _memoryviewslice memviewsliceobj - * - * assert memview.view.ndim > 0 # <<<<<<<<<<<<<< - * - * if isinstance(memview, _memoryviewslice): - */ - #ifndef CYTHON_WITHOUT_ASSERTIONS - if (unlikely(__pyx_assertions_enabled())) { - __pyx_t_1 = (__pyx_v_memview->view.ndim > 0); - if (unlikely(!__pyx_t_1)) { - __Pyx_Raise(__pyx_builtin_AssertionError, 0, 0, 0); - __PYX_ERR(1, 723, __pyx_L1_error) - } - } - #else - if ((1)); else __PYX_ERR(1, 723, __pyx_L1_error) - #endif - - /* "View.MemoryView":725 - * assert memview.view.ndim > 0 - * - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * memviewsliceobj = memview - * p_src = &memviewsliceobj.from_slice - */ - __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); - if (__pyx_t_1) { - - /* "View.MemoryView":726 - * - * if isinstance(memview, _memoryviewslice): - * memviewsliceobj = memview # <<<<<<<<<<<<<< - * p_src = &memviewsliceobj.from_slice - * else: - */ - if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(1, 726, __pyx_L1_error) - __pyx_t_2 = ((PyObject *)__pyx_v_memview); - __Pyx_INCREF(__pyx_t_2); - __pyx_v_memviewsliceobj = ((struct __pyx_memoryviewslice_obj *)__pyx_t_2); - __pyx_t_2 = 0; - - /* "View.MemoryView":727 - * if isinstance(memview, _memoryviewslice): - * memviewsliceobj = memview - * p_src = &memviewsliceobj.from_slice # <<<<<<<<<<<<<< - * else: - * slice_copy(memview, &src) - */ - __pyx_v_p_src = (&__pyx_v_memviewsliceobj->from_slice); - - /* "View.MemoryView":725 - * assert memview.view.ndim > 0 - * - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * memviewsliceobj = memview - * p_src = &memviewsliceobj.from_slice - */ - goto __pyx_L3; - } - - /* "View.MemoryView":729 - * p_src = &memviewsliceobj.from_slice - * else: - * slice_copy(memview, &src) # <<<<<<<<<<<<<< - * p_src = &src - * - */ - /*else*/ { - __pyx_memoryview_slice_copy(__pyx_v_memview, (&__pyx_v_src)); - - /* "View.MemoryView":730 - * else: - * slice_copy(memview, &src) - * p_src = &src # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_p_src = (&__pyx_v_src); - } - __pyx_L3:; - - /* "View.MemoryView":736 - * - * - * dst.memview = p_src.memview # <<<<<<<<<<<<<< - * dst.data = p_src.data - * - */ - __pyx_t_3 = __pyx_v_p_src->memview; - __pyx_v_dst.memview = __pyx_t_3; - - /* "View.MemoryView":737 - * - * dst.memview = p_src.memview - * dst.data = p_src.data # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_4 = __pyx_v_p_src->data; - __pyx_v_dst.data = __pyx_t_4; - - /* "View.MemoryView":742 - * - * - * cdef __Pyx_memviewslice *p_dst = &dst # <<<<<<<<<<<<<< - * cdef int *p_suboffset_dim = &suboffset_dim - * cdef Py_ssize_t start, stop, step, cindex - */ - __pyx_v_p_dst = (&__pyx_v_dst); - - /* "View.MemoryView":743 - * - * cdef __Pyx_memviewslice *p_dst = &dst - * cdef int *p_suboffset_dim = &suboffset_dim # <<<<<<<<<<<<<< - * cdef Py_ssize_t start, stop, step, cindex - * cdef bint have_start, have_stop, have_step - */ - __pyx_v_p_suboffset_dim = (&__pyx_v_suboffset_dim); - - /* "View.MemoryView":747 - * cdef bint have_start, have_stop, have_step - * - * for dim, index in enumerate(indices): # <<<<<<<<<<<<<< - * if PyIndex_Check(index): - * cindex = index - */ - __pyx_t_5 = 0; - if (likely(PyList_CheckExact(__pyx_v_indices)) || PyTuple_CheckExact(__pyx_v_indices)) { - __pyx_t_2 = __pyx_v_indices; __Pyx_INCREF(__pyx_t_2); __pyx_t_6 = 0; - __pyx_t_7 = NULL; - } else { - __pyx_t_6 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_indices); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 747, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 747, __pyx_L1_error) - } - for (;;) { - if (likely(!__pyx_t_7)) { - if (likely(PyList_CheckExact(__pyx_t_2))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_2)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely((0 < 0))) __PYX_ERR(1, 747, __pyx_L1_error) - #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 747, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - #endif - } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_2)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely((0 < 0))) __PYX_ERR(1, 747, __pyx_L1_error) - #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 747, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - #endif - } - } else { - __pyx_t_8 = __pyx_t_7(__pyx_t_2); - if (unlikely(!__pyx_t_8)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 747, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_8); - } - __Pyx_XDECREF_SET(__pyx_v_index, __pyx_t_8); - __pyx_t_8 = 0; - __pyx_v_dim = __pyx_t_5; - __pyx_t_5 = (__pyx_t_5 + 1); - - /* "View.MemoryView":748 - * - * for dim, index in enumerate(indices): - * if PyIndex_Check(index): # <<<<<<<<<<<<<< - * cindex = index - * slice_memviewslice( - */ - __pyx_t_1 = (PyIndex_Check(__pyx_v_index) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":749 - * for dim, index in enumerate(indices): - * if PyIndex_Check(index): - * cindex = index # <<<<<<<<<<<<<< - * slice_memviewslice( - * p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], - */ - __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_v_index); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 749, __pyx_L1_error) - __pyx_v_cindex = __pyx_t_9; - - /* "View.MemoryView":750 - * if PyIndex_Check(index): - * cindex = index - * slice_memviewslice( # <<<<<<<<<<<<<< - * p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], - * dim, new_ndim, p_suboffset_dim, - */ - __pyx_t_10 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_v_cindex, 0, 0, 0, 0, 0, 0); if (unlikely(__pyx_t_10 == ((int)-1))) __PYX_ERR(1, 750, __pyx_L1_error) - - /* "View.MemoryView":748 - * - * for dim, index in enumerate(indices): - * if PyIndex_Check(index): # <<<<<<<<<<<<<< - * cindex = index - * slice_memviewslice( - */ - goto __pyx_L6; - } - - /* "View.MemoryView":756 - * 0, 0, 0, # have_{start,stop,step} - * False) - * elif index is None: # <<<<<<<<<<<<<< - * p_dst.shape[new_ndim] = 1 - * p_dst.strides[new_ndim] = 0 - */ - __pyx_t_1 = (__pyx_v_index == Py_None); - if (__pyx_t_1) { - - /* "View.MemoryView":757 - * False) - * elif index is None: - * p_dst.shape[new_ndim] = 1 # <<<<<<<<<<<<<< - * p_dst.strides[new_ndim] = 0 - * p_dst.suboffsets[new_ndim] = -1 - */ - (__pyx_v_p_dst->shape[__pyx_v_new_ndim]) = 1; - - /* "View.MemoryView":758 - * elif index is None: - * p_dst.shape[new_ndim] = 1 - * p_dst.strides[new_ndim] = 0 # <<<<<<<<<<<<<< - * p_dst.suboffsets[new_ndim] = -1 - * new_ndim += 1 - */ - (__pyx_v_p_dst->strides[__pyx_v_new_ndim]) = 0; - - /* "View.MemoryView":759 - * p_dst.shape[new_ndim] = 1 - * p_dst.strides[new_ndim] = 0 - * p_dst.suboffsets[new_ndim] = -1 # <<<<<<<<<<<<<< - * new_ndim += 1 - * else: - */ - (__pyx_v_p_dst->suboffsets[__pyx_v_new_ndim]) = -1L; - - /* "View.MemoryView":760 - * p_dst.strides[new_ndim] = 0 - * p_dst.suboffsets[new_ndim] = -1 - * new_ndim += 1 # <<<<<<<<<<<<<< - * else: - * start = index.start or 0 - */ - __pyx_v_new_ndim = (__pyx_v_new_ndim + 1); - - /* "View.MemoryView":756 - * 0, 0, 0, # have_{start,stop,step} - * False) - * elif index is None: # <<<<<<<<<<<<<< - * p_dst.shape[new_ndim] = 1 - * p_dst.strides[new_ndim] = 0 - */ - goto __pyx_L6; - } - - /* "View.MemoryView":762 - * new_ndim += 1 - * else: - * start = index.start or 0 # <<<<<<<<<<<<<< - * stop = index.stop or 0 - * step = index.step or 0 - */ - /*else*/ { - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 762, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(1, 762, __pyx_L1_error) - if (!__pyx_t_1) { - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - } else { - __pyx_t_11 = __Pyx_PyIndex_AsSsize_t(__pyx_t_8); if (unlikely((__pyx_t_11 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 762, __pyx_L1_error) - __pyx_t_9 = __pyx_t_11; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L7_bool_binop_done; - } - __pyx_t_9 = 0; - __pyx_L7_bool_binop_done:; - __pyx_v_start = __pyx_t_9; - - /* "View.MemoryView":763 - * else: - * start = index.start or 0 - * stop = index.stop or 0 # <<<<<<<<<<<<<< - * step = index.step or 0 - * - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 763, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(1, 763, __pyx_L1_error) - if (!__pyx_t_1) { - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - } else { - __pyx_t_11 = __Pyx_PyIndex_AsSsize_t(__pyx_t_8); if (unlikely((__pyx_t_11 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 763, __pyx_L1_error) - __pyx_t_9 = __pyx_t_11; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L9_bool_binop_done; - } - __pyx_t_9 = 0; - __pyx_L9_bool_binop_done:; - __pyx_v_stop = __pyx_t_9; - - /* "View.MemoryView":764 - * start = index.start or 0 - * stop = index.stop or 0 - * step = index.step or 0 # <<<<<<<<<<<<<< - * - * have_start = index.start is not None - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 764, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(1, 764, __pyx_L1_error) - if (!__pyx_t_1) { - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - } else { - __pyx_t_11 = __Pyx_PyIndex_AsSsize_t(__pyx_t_8); if (unlikely((__pyx_t_11 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 764, __pyx_L1_error) - __pyx_t_9 = __pyx_t_11; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L11_bool_binop_done; - } - __pyx_t_9 = 0; - __pyx_L11_bool_binop_done:; - __pyx_v_step = __pyx_t_9; - - /* "View.MemoryView":766 - * step = index.step or 0 - * - * have_start = index.start is not None # <<<<<<<<<<<<<< - * have_stop = index.stop is not None - * have_step = index.step is not None - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 766, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = (__pyx_t_8 != Py_None); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_v_have_start = __pyx_t_1; - - /* "View.MemoryView":767 - * - * have_start = index.start is not None - * have_stop = index.stop is not None # <<<<<<<<<<<<<< - * have_step = index.step is not None - * - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 767, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = (__pyx_t_8 != Py_None); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_v_have_stop = __pyx_t_1; - - /* "View.MemoryView":768 - * have_start = index.start is not None - * have_stop = index.stop is not None - * have_step = index.step is not None # <<<<<<<<<<<<<< - * - * slice_memviewslice( - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 768, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = (__pyx_t_8 != Py_None); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_v_have_step = __pyx_t_1; - - /* "View.MemoryView":770 - * have_step = index.step is not None - * - * slice_memviewslice( # <<<<<<<<<<<<<< - * p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], - * dim, new_ndim, p_suboffset_dim, - */ - __pyx_t_10 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_v_start, __pyx_v_stop, __pyx_v_step, __pyx_v_have_start, __pyx_v_have_stop, __pyx_v_have_step, 1); if (unlikely(__pyx_t_10 == ((int)-1))) __PYX_ERR(1, 770, __pyx_L1_error) - - /* "View.MemoryView":776 - * have_start, have_stop, have_step, - * True) - * new_ndim += 1 # <<<<<<<<<<<<<< - * - * if isinstance(memview, _memoryviewslice): - */ - __pyx_v_new_ndim = (__pyx_v_new_ndim + 1); - } - __pyx_L6:; - - /* "View.MemoryView":747 - * cdef bint have_start, have_stop, have_step - * - * for dim, index in enumerate(indices): # <<<<<<<<<<<<<< - * if PyIndex_Check(index): - * cindex = index - */ - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "View.MemoryView":778 - * new_ndim += 1 - * - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * return memoryview_fromslice(dst, new_ndim, - * memviewsliceobj.to_object_func, - */ - __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); - if (__pyx_t_1) { - - /* "View.MemoryView":779 - * - * if isinstance(memview, _memoryviewslice): - * return memoryview_fromslice(dst, new_ndim, # <<<<<<<<<<<<<< - * memviewsliceobj.to_object_func, - * memviewsliceobj.to_dtype_func, - */ - __Pyx_XDECREF((PyObject *)__pyx_r); - - /* "View.MemoryView":780 - * if isinstance(memview, _memoryviewslice): - * return memoryview_fromslice(dst, new_ndim, - * memviewsliceobj.to_object_func, # <<<<<<<<<<<<<< - * memviewsliceobj.to_dtype_func, - * memview.dtype_is_object) - */ - if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(1, 780, __pyx_L1_error) } - - /* "View.MemoryView":781 - * return memoryview_fromslice(dst, new_ndim, - * memviewsliceobj.to_object_func, - * memviewsliceobj.to_dtype_func, # <<<<<<<<<<<<<< - * memview.dtype_is_object) - * else: - */ - if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(1, 781, __pyx_L1_error) } - - /* "View.MemoryView":779 - * - * if isinstance(memview, _memoryviewslice): - * return memoryview_fromslice(dst, new_ndim, # <<<<<<<<<<<<<< - * memviewsliceobj.to_object_func, - * memviewsliceobj.to_dtype_func, - */ - __pyx_t_2 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, __pyx_v_memviewsliceobj->to_object_func, __pyx_v_memviewsliceobj->to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 779, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_memoryview_type))))) __PYX_ERR(1, 779, __pyx_L1_error) - __pyx_r = ((struct __pyx_memoryview_obj *)__pyx_t_2); - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":778 - * new_ndim += 1 - * - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * return memoryview_fromslice(dst, new_ndim, - * memviewsliceobj.to_object_func, - */ - } - - /* "View.MemoryView":784 - * memview.dtype_is_object) - * else: - * return memoryview_fromslice(dst, new_ndim, NULL, NULL, # <<<<<<<<<<<<<< - * memview.dtype_is_object) - * - */ - /*else*/ { - __Pyx_XDECREF((PyObject *)__pyx_r); - - /* "View.MemoryView":785 - * else: - * return memoryview_fromslice(dst, new_ndim, NULL, NULL, - * memview.dtype_is_object) # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_2 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, NULL, NULL, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 784, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - - /* "View.MemoryView":784 - * memview.dtype_is_object) - * else: - * return memoryview_fromslice(dst, new_ndim, NULL, NULL, # <<<<<<<<<<<<<< - * memview.dtype_is_object) - * - */ - if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_memoryview_type))))) __PYX_ERR(1, 784, __pyx_L1_error) - __pyx_r = ((struct __pyx_memoryview_obj *)__pyx_t_2); - __pyx_t_2 = 0; - goto __pyx_L0; - } - - /* "View.MemoryView":711 - * - * @cname('__pyx_memview_slice') - * cdef memoryview memview_slice(memoryview memview, object indices): # <<<<<<<<<<<<<< - * cdef int new_ndim = 0, suboffset_dim = -1, dim - * cdef bint negative_step - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("View.MemoryView.memview_slice", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_memviewsliceobj); - __Pyx_XDECREF(__pyx_v_index); - __Pyx_XGIVEREF((PyObject *)__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":793 - * - * @cname('__pyx_memoryview_slice_memviewslice') - * cdef int slice_memviewslice( # <<<<<<<<<<<<<< - * __Pyx_memviewslice *dst, - * Py_ssize_t shape, Py_ssize_t stride, Py_ssize_t suboffset, - */ - -static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, Py_ssize_t __pyx_v_shape, Py_ssize_t __pyx_v_stride, Py_ssize_t __pyx_v_suboffset, int __pyx_v_dim, int __pyx_v_new_ndim, int *__pyx_v_suboffset_dim, Py_ssize_t __pyx_v_start, Py_ssize_t __pyx_v_stop, Py_ssize_t __pyx_v_step, int __pyx_v_have_start, int __pyx_v_have_stop, int __pyx_v_have_step, int __pyx_v_is_slice) { - Py_ssize_t __pyx_v_new_shape; - int __pyx_v_negative_step; - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save; - #endif - __Pyx_RefNannySetupContext("slice_memviewslice", 1); - - /* "View.MemoryView":813 - * cdef bint negative_step - * - * if not is_slice: # <<<<<<<<<<<<<< - * - * if start < 0: - */ - __pyx_t_1 = (!__pyx_v_is_slice); - if (__pyx_t_1) { - - /* "View.MemoryView":815 - * if not is_slice: - * - * if start < 0: # <<<<<<<<<<<<<< - * start += shape - * if not 0 <= start < shape: - */ - __pyx_t_1 = (__pyx_v_start < 0); - if (__pyx_t_1) { - - /* "View.MemoryView":816 - * - * if start < 0: - * start += shape # <<<<<<<<<<<<<< - * if not 0 <= start < shape: - * _err_dim(PyExc_IndexError, "Index out of bounds (axis %d)", dim) - */ - __pyx_v_start = (__pyx_v_start + __pyx_v_shape); - - /* "View.MemoryView":815 - * if not is_slice: - * - * if start < 0: # <<<<<<<<<<<<<< - * start += shape - * if not 0 <= start < shape: - */ - } - - /* "View.MemoryView":817 - * if start < 0: - * start += shape - * if not 0 <= start < shape: # <<<<<<<<<<<<<< - * _err_dim(PyExc_IndexError, "Index out of bounds (axis %d)", dim) - * else: - */ - __pyx_t_1 = (0 <= __pyx_v_start); - if (__pyx_t_1) { - __pyx_t_1 = (__pyx_v_start < __pyx_v_shape); - } - __pyx_t_2 = (!__pyx_t_1); - if (__pyx_t_2) { - - /* "View.MemoryView":818 - * start += shape - * if not 0 <= start < shape: - * _err_dim(PyExc_IndexError, "Index out of bounds (axis %d)", dim) # <<<<<<<<<<<<<< - * else: - * - */ - __pyx_t_3 = __pyx_memoryview_err_dim(PyExc_IndexError, __pyx_kp_s_Index_out_of_bounds_axis_d, __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 818, __pyx_L1_error) - - /* "View.MemoryView":817 - * if start < 0: - * start += shape - * if not 0 <= start < shape: # <<<<<<<<<<<<<< - * _err_dim(PyExc_IndexError, "Index out of bounds (axis %d)", dim) - * else: - */ - } - - /* "View.MemoryView":813 - * cdef bint negative_step - * - * if not is_slice: # <<<<<<<<<<<<<< - * - * if start < 0: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":821 - * else: - * - * if have_step: # <<<<<<<<<<<<<< - * negative_step = step < 0 - * if step == 0: - */ - /*else*/ { - __pyx_t_2 = (__pyx_v_have_step != 0); - if (__pyx_t_2) { - - /* "View.MemoryView":822 - * - * if have_step: - * negative_step = step < 0 # <<<<<<<<<<<<<< - * if step == 0: - * _err_dim(PyExc_ValueError, "Step may not be zero (axis %d)", dim) - */ - __pyx_v_negative_step = (__pyx_v_step < 0); - - /* "View.MemoryView":823 - * if have_step: - * negative_step = step < 0 - * if step == 0: # <<<<<<<<<<<<<< - * _err_dim(PyExc_ValueError, "Step may not be zero (axis %d)", dim) - * else: - */ - __pyx_t_2 = (__pyx_v_step == 0); - if (__pyx_t_2) { - - /* "View.MemoryView":824 - * negative_step = step < 0 - * if step == 0: - * _err_dim(PyExc_ValueError, "Step may not be zero (axis %d)", dim) # <<<<<<<<<<<<<< - * else: - * negative_step = False - */ - __pyx_t_3 = __pyx_memoryview_err_dim(PyExc_ValueError, __pyx_kp_s_Step_may_not_be_zero_axis_d, __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 824, __pyx_L1_error) - - /* "View.MemoryView":823 - * if have_step: - * negative_step = step < 0 - * if step == 0: # <<<<<<<<<<<<<< - * _err_dim(PyExc_ValueError, "Step may not be zero (axis %d)", dim) - * else: - */ - } - - /* "View.MemoryView":821 - * else: - * - * if have_step: # <<<<<<<<<<<<<< - * negative_step = step < 0 - * if step == 0: - */ - goto __pyx_L6; - } - - /* "View.MemoryView":826 - * _err_dim(PyExc_ValueError, "Step may not be zero (axis %d)", dim) - * else: - * negative_step = False # <<<<<<<<<<<<<< - * step = 1 - * - */ - /*else*/ { - __pyx_v_negative_step = 0; - - /* "View.MemoryView":827 - * else: - * negative_step = False - * step = 1 # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_step = 1; - } - __pyx_L6:; - - /* "View.MemoryView":830 - * - * - * if have_start: # <<<<<<<<<<<<<< - * if start < 0: - * start += shape - */ - __pyx_t_2 = (__pyx_v_have_start != 0); - if (__pyx_t_2) { - - /* "View.MemoryView":831 - * - * if have_start: - * if start < 0: # <<<<<<<<<<<<<< - * start += shape - * if start < 0: - */ - __pyx_t_2 = (__pyx_v_start < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":832 - * if have_start: - * if start < 0: - * start += shape # <<<<<<<<<<<<<< - * if start < 0: - * start = 0 - */ - __pyx_v_start = (__pyx_v_start + __pyx_v_shape); - - /* "View.MemoryView":833 - * if start < 0: - * start += shape - * if start < 0: # <<<<<<<<<<<<<< - * start = 0 - * elif start >= shape: - */ - __pyx_t_2 = (__pyx_v_start < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":834 - * start += shape - * if start < 0: - * start = 0 # <<<<<<<<<<<<<< - * elif start >= shape: - * if negative_step: - */ - __pyx_v_start = 0; - - /* "View.MemoryView":833 - * if start < 0: - * start += shape - * if start < 0: # <<<<<<<<<<<<<< - * start = 0 - * elif start >= shape: - */ - } - - /* "View.MemoryView":831 - * - * if have_start: - * if start < 0: # <<<<<<<<<<<<<< - * start += shape - * if start < 0: - */ - goto __pyx_L9; - } - - /* "View.MemoryView":835 - * if start < 0: - * start = 0 - * elif start >= shape: # <<<<<<<<<<<<<< - * if negative_step: - * start = shape - 1 - */ - __pyx_t_2 = (__pyx_v_start >= __pyx_v_shape); - if (__pyx_t_2) { - - /* "View.MemoryView":836 - * start = 0 - * elif start >= shape: - * if negative_step: # <<<<<<<<<<<<<< - * start = shape - 1 - * else: - */ - if (__pyx_v_negative_step) { - - /* "View.MemoryView":837 - * elif start >= shape: - * if negative_step: - * start = shape - 1 # <<<<<<<<<<<<<< - * else: - * start = shape - */ - __pyx_v_start = (__pyx_v_shape - 1); - - /* "View.MemoryView":836 - * start = 0 - * elif start >= shape: - * if negative_step: # <<<<<<<<<<<<<< - * start = shape - 1 - * else: - */ - goto __pyx_L11; - } - - /* "View.MemoryView":839 - * start = shape - 1 - * else: - * start = shape # <<<<<<<<<<<<<< - * else: - * if negative_step: - */ - /*else*/ { - __pyx_v_start = __pyx_v_shape; - } - __pyx_L11:; - - /* "View.MemoryView":835 - * if start < 0: - * start = 0 - * elif start >= shape: # <<<<<<<<<<<<<< - * if negative_step: - * start = shape - 1 - */ - } - __pyx_L9:; - - /* "View.MemoryView":830 - * - * - * if have_start: # <<<<<<<<<<<<<< - * if start < 0: - * start += shape - */ - goto __pyx_L8; - } - - /* "View.MemoryView":841 - * start = shape - * else: - * if negative_step: # <<<<<<<<<<<<<< - * start = shape - 1 - * else: - */ - /*else*/ { - if (__pyx_v_negative_step) { - - /* "View.MemoryView":842 - * else: - * if negative_step: - * start = shape - 1 # <<<<<<<<<<<<<< - * else: - * start = 0 - */ - __pyx_v_start = (__pyx_v_shape - 1); - - /* "View.MemoryView":841 - * start = shape - * else: - * if negative_step: # <<<<<<<<<<<<<< - * start = shape - 1 - * else: - */ - goto __pyx_L12; - } - - /* "View.MemoryView":844 - * start = shape - 1 - * else: - * start = 0 # <<<<<<<<<<<<<< - * - * if have_stop: - */ - /*else*/ { - __pyx_v_start = 0; - } - __pyx_L12:; - } - __pyx_L8:; - - /* "View.MemoryView":846 - * start = 0 - * - * if have_stop: # <<<<<<<<<<<<<< - * if stop < 0: - * stop += shape - */ - __pyx_t_2 = (__pyx_v_have_stop != 0); - if (__pyx_t_2) { - - /* "View.MemoryView":847 - * - * if have_stop: - * if stop < 0: # <<<<<<<<<<<<<< - * stop += shape - * if stop < 0: - */ - __pyx_t_2 = (__pyx_v_stop < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":848 - * if have_stop: - * if stop < 0: - * stop += shape # <<<<<<<<<<<<<< - * if stop < 0: - * stop = 0 - */ - __pyx_v_stop = (__pyx_v_stop + __pyx_v_shape); - - /* "View.MemoryView":849 - * if stop < 0: - * stop += shape - * if stop < 0: # <<<<<<<<<<<<<< - * stop = 0 - * elif stop > shape: - */ - __pyx_t_2 = (__pyx_v_stop < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":850 - * stop += shape - * if stop < 0: - * stop = 0 # <<<<<<<<<<<<<< - * elif stop > shape: - * stop = shape - */ - __pyx_v_stop = 0; - - /* "View.MemoryView":849 - * if stop < 0: - * stop += shape - * if stop < 0: # <<<<<<<<<<<<<< - * stop = 0 - * elif stop > shape: - */ - } - - /* "View.MemoryView":847 - * - * if have_stop: - * if stop < 0: # <<<<<<<<<<<<<< - * stop += shape - * if stop < 0: - */ - goto __pyx_L14; - } - - /* "View.MemoryView":851 - * if stop < 0: - * stop = 0 - * elif stop > shape: # <<<<<<<<<<<<<< - * stop = shape - * else: - */ - __pyx_t_2 = (__pyx_v_stop > __pyx_v_shape); - if (__pyx_t_2) { - - /* "View.MemoryView":852 - * stop = 0 - * elif stop > shape: - * stop = shape # <<<<<<<<<<<<<< - * else: - * if negative_step: - */ - __pyx_v_stop = __pyx_v_shape; - - /* "View.MemoryView":851 - * if stop < 0: - * stop = 0 - * elif stop > shape: # <<<<<<<<<<<<<< - * stop = shape - * else: - */ - } - __pyx_L14:; - - /* "View.MemoryView":846 - * start = 0 - * - * if have_stop: # <<<<<<<<<<<<<< - * if stop < 0: - * stop += shape - */ - goto __pyx_L13; - } - - /* "View.MemoryView":854 - * stop = shape - * else: - * if negative_step: # <<<<<<<<<<<<<< - * stop = -1 - * else: - */ - /*else*/ { - if (__pyx_v_negative_step) { - - /* "View.MemoryView":855 - * else: - * if negative_step: - * stop = -1 # <<<<<<<<<<<<<< - * else: - * stop = shape - */ - __pyx_v_stop = -1L; - - /* "View.MemoryView":854 - * stop = shape - * else: - * if negative_step: # <<<<<<<<<<<<<< - * stop = -1 - * else: - */ - goto __pyx_L16; - } - - /* "View.MemoryView":857 - * stop = -1 - * else: - * stop = shape # <<<<<<<<<<<<<< - * - * - */ - /*else*/ { - __pyx_v_stop = __pyx_v_shape; - } - __pyx_L16:; - } - __pyx_L13:; - - /* "View.MemoryView":861 - * - * with cython.cdivision(True): - * new_shape = (stop - start) // step # <<<<<<<<<<<<<< - * - * if (stop - start) - step * new_shape: - */ - __pyx_v_new_shape = ((__pyx_v_stop - __pyx_v_start) / __pyx_v_step); - - /* "View.MemoryView":863 - * new_shape = (stop - start) // step - * - * if (stop - start) - step * new_shape: # <<<<<<<<<<<<<< - * new_shape += 1 - * - */ - __pyx_t_2 = (((__pyx_v_stop - __pyx_v_start) - (__pyx_v_step * __pyx_v_new_shape)) != 0); - if (__pyx_t_2) { - - /* "View.MemoryView":864 - * - * if (stop - start) - step * new_shape: - * new_shape += 1 # <<<<<<<<<<<<<< - * - * if new_shape < 0: - */ - __pyx_v_new_shape = (__pyx_v_new_shape + 1); - - /* "View.MemoryView":863 - * new_shape = (stop - start) // step - * - * if (stop - start) - step * new_shape: # <<<<<<<<<<<<<< - * new_shape += 1 - * - */ - } - - /* "View.MemoryView":866 - * new_shape += 1 - * - * if new_shape < 0: # <<<<<<<<<<<<<< - * new_shape = 0 - * - */ - __pyx_t_2 = (__pyx_v_new_shape < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":867 - * - * if new_shape < 0: - * new_shape = 0 # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_new_shape = 0; - - /* "View.MemoryView":866 - * new_shape += 1 - * - * if new_shape < 0: # <<<<<<<<<<<<<< - * new_shape = 0 - * - */ - } - - /* "View.MemoryView":870 - * - * - * dst.strides[new_ndim] = stride * step # <<<<<<<<<<<<<< - * dst.shape[new_ndim] = new_shape - * dst.suboffsets[new_ndim] = suboffset - */ - (__pyx_v_dst->strides[__pyx_v_new_ndim]) = (__pyx_v_stride * __pyx_v_step); - - /* "View.MemoryView":871 - * - * dst.strides[new_ndim] = stride * step - * dst.shape[new_ndim] = new_shape # <<<<<<<<<<<<<< - * dst.suboffsets[new_ndim] = suboffset - * - */ - (__pyx_v_dst->shape[__pyx_v_new_ndim]) = __pyx_v_new_shape; - - /* "View.MemoryView":872 - * dst.strides[new_ndim] = stride * step - * dst.shape[new_ndim] = new_shape - * dst.suboffsets[new_ndim] = suboffset # <<<<<<<<<<<<<< - * - * - */ - (__pyx_v_dst->suboffsets[__pyx_v_new_ndim]) = __pyx_v_suboffset; - } - __pyx_L3:; - - /* "View.MemoryView":875 - * - * - * if suboffset_dim[0] < 0: # <<<<<<<<<<<<<< - * dst.data += start * stride - * else: - */ - __pyx_t_2 = ((__pyx_v_suboffset_dim[0]) < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":876 - * - * if suboffset_dim[0] < 0: - * dst.data += start * stride # <<<<<<<<<<<<<< - * else: - * dst.suboffsets[suboffset_dim[0]] += start * stride - */ - __pyx_v_dst->data = (__pyx_v_dst->data + (__pyx_v_start * __pyx_v_stride)); - - /* "View.MemoryView":875 - * - * - * if suboffset_dim[0] < 0: # <<<<<<<<<<<<<< - * dst.data += start * stride - * else: - */ - goto __pyx_L19; - } - - /* "View.MemoryView":878 - * dst.data += start * stride - * else: - * dst.suboffsets[suboffset_dim[0]] += start * stride # <<<<<<<<<<<<<< - * - * if suboffset >= 0: - */ - /*else*/ { - __pyx_t_3 = (__pyx_v_suboffset_dim[0]); - (__pyx_v_dst->suboffsets[__pyx_t_3]) = ((__pyx_v_dst->suboffsets[__pyx_t_3]) + (__pyx_v_start * __pyx_v_stride)); - } - __pyx_L19:; - - /* "View.MemoryView":880 - * dst.suboffsets[suboffset_dim[0]] += start * stride - * - * if suboffset >= 0: # <<<<<<<<<<<<<< - * if not is_slice: - * if new_ndim == 0: - */ - __pyx_t_2 = (__pyx_v_suboffset >= 0); - if (__pyx_t_2) { - - /* "View.MemoryView":881 - * - * if suboffset >= 0: - * if not is_slice: # <<<<<<<<<<<<<< - * if new_ndim == 0: - * dst.data = ( dst.data)[0] + suboffset - */ - __pyx_t_2 = (!__pyx_v_is_slice); - if (__pyx_t_2) { - - /* "View.MemoryView":882 - * if suboffset >= 0: - * if not is_slice: - * if new_ndim == 0: # <<<<<<<<<<<<<< - * dst.data = ( dst.data)[0] + suboffset - * else: - */ - __pyx_t_2 = (__pyx_v_new_ndim == 0); - if (__pyx_t_2) { - - /* "View.MemoryView":883 - * if not is_slice: - * if new_ndim == 0: - * dst.data = ( dst.data)[0] + suboffset # <<<<<<<<<<<<<< - * else: - * _err_dim(PyExc_IndexError, "All dimensions preceding dimension %d " - */ - __pyx_v_dst->data = ((((char **)__pyx_v_dst->data)[0]) + __pyx_v_suboffset); - - /* "View.MemoryView":882 - * if suboffset >= 0: - * if not is_slice: - * if new_ndim == 0: # <<<<<<<<<<<<<< - * dst.data = ( dst.data)[0] + suboffset - * else: - */ - goto __pyx_L22; - } - - /* "View.MemoryView":885 - * dst.data = ( dst.data)[0] + suboffset - * else: - * _err_dim(PyExc_IndexError, "All dimensions preceding dimension %d " # <<<<<<<<<<<<<< - * "must be indexed and not sliced", dim) - * else: - */ - /*else*/ { - - /* "View.MemoryView":886 - * else: - * _err_dim(PyExc_IndexError, "All dimensions preceding dimension %d " - * "must be indexed and not sliced", dim) # <<<<<<<<<<<<<< - * else: - * suboffset_dim[0] = new_ndim - */ - __pyx_t_3 = __pyx_memoryview_err_dim(PyExc_IndexError, __pyx_kp_s_All_dimensions_preceding_dimensi, __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 885, __pyx_L1_error) - } - __pyx_L22:; - - /* "View.MemoryView":881 - * - * if suboffset >= 0: - * if not is_slice: # <<<<<<<<<<<<<< - * if new_ndim == 0: - * dst.data = ( dst.data)[0] + suboffset - */ - goto __pyx_L21; - } - - /* "View.MemoryView":888 - * "must be indexed and not sliced", dim) - * else: - * suboffset_dim[0] = new_ndim # <<<<<<<<<<<<<< - * - * return 0 - */ - /*else*/ { - (__pyx_v_suboffset_dim[0]) = __pyx_v_new_ndim; - } - __pyx_L21:; - - /* "View.MemoryView":880 - * dst.suboffsets[suboffset_dim[0]] += start * stride - * - * if suboffset >= 0: # <<<<<<<<<<<<<< - * if not is_slice: - * if new_ndim == 0: - */ - } - - /* "View.MemoryView":890 - * suboffset_dim[0] = new_ndim - * - * return 0 # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":793 - * - * @cname('__pyx_memoryview_slice_memviewslice') - * cdef int slice_memviewslice( # <<<<<<<<<<<<<< - * __Pyx_memviewslice *dst, - * Py_ssize_t shape, Py_ssize_t stride, Py_ssize_t suboffset, - */ - - /* function exit code */ - __pyx_L1_error:; - #ifdef WITH_THREAD - __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_AddTraceback("View.MemoryView.slice_memviewslice", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __pyx_L0:; - __Pyx_RefNannyFinishContextNogil() - return __pyx_r; -} - -/* "View.MemoryView":896 - * - * @cname('__pyx_pybuffer_index') - * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, # <<<<<<<<<<<<<< - * Py_ssize_t dim) except NULL: - * cdef Py_ssize_t shape, stride, suboffset = -1 - */ - -static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, Py_ssize_t __pyx_v_index, Py_ssize_t __pyx_v_dim) { - Py_ssize_t __pyx_v_shape; - Py_ssize_t __pyx_v_stride; - Py_ssize_t __pyx_v_suboffset; - Py_ssize_t __pyx_v_itemsize; - char *__pyx_v_resultp; - char *__pyx_r; - __Pyx_RefNannyDeclarations - Py_ssize_t __pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - Py_UCS4 __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("pybuffer_index", 0); - - /* "View.MemoryView":898 - * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, - * Py_ssize_t dim) except NULL: - * cdef Py_ssize_t shape, stride, suboffset = -1 # <<<<<<<<<<<<<< - * cdef Py_ssize_t itemsize = view.itemsize - * cdef char *resultp - */ - __pyx_v_suboffset = -1L; - - /* "View.MemoryView":899 - * Py_ssize_t dim) except NULL: - * cdef Py_ssize_t shape, stride, suboffset = -1 - * cdef Py_ssize_t itemsize = view.itemsize # <<<<<<<<<<<<<< - * cdef char *resultp - * - */ - __pyx_t_1 = __pyx_v_view->itemsize; - __pyx_v_itemsize = __pyx_t_1; - - /* "View.MemoryView":902 - * cdef char *resultp - * - * if view.ndim == 0: # <<<<<<<<<<<<<< - * shape = view.len // itemsize - * stride = itemsize - */ - __pyx_t_2 = (__pyx_v_view->ndim == 0); - if (__pyx_t_2) { - - /* "View.MemoryView":903 - * - * if view.ndim == 0: - * shape = view.len // itemsize # <<<<<<<<<<<<<< - * stride = itemsize - * else: - */ - if (unlikely(__pyx_v_itemsize == 0)) { - PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - __PYX_ERR(1, 903, __pyx_L1_error) - } - else if (sizeof(Py_ssize_t) == sizeof(long) && (!(((Py_ssize_t)-1) > 0)) && unlikely(__pyx_v_itemsize == (Py_ssize_t)-1) && unlikely(__Pyx_UNARY_NEG_WOULD_OVERFLOW(__pyx_v_view->len))) { - PyErr_SetString(PyExc_OverflowError, "value too large to perform division"); - __PYX_ERR(1, 903, __pyx_L1_error) - } - __pyx_v_shape = __Pyx_div_Py_ssize_t(__pyx_v_view->len, __pyx_v_itemsize); - - /* "View.MemoryView":904 - * if view.ndim == 0: - * shape = view.len // itemsize - * stride = itemsize # <<<<<<<<<<<<<< - * else: - * shape = view.shape[dim] - */ - __pyx_v_stride = __pyx_v_itemsize; - - /* "View.MemoryView":902 - * cdef char *resultp - * - * if view.ndim == 0: # <<<<<<<<<<<<<< - * shape = view.len // itemsize - * stride = itemsize - */ - goto __pyx_L3; - } - - /* "View.MemoryView":906 - * stride = itemsize - * else: - * shape = view.shape[dim] # <<<<<<<<<<<<<< - * stride = view.strides[dim] - * if view.suboffsets != NULL: - */ - /*else*/ { - __pyx_v_shape = (__pyx_v_view->shape[__pyx_v_dim]); - - /* "View.MemoryView":907 - * else: - * shape = view.shape[dim] - * stride = view.strides[dim] # <<<<<<<<<<<<<< - * if view.suboffsets != NULL: - * suboffset = view.suboffsets[dim] - */ - __pyx_v_stride = (__pyx_v_view->strides[__pyx_v_dim]); - - /* "View.MemoryView":908 - * shape = view.shape[dim] - * stride = view.strides[dim] - * if view.suboffsets != NULL: # <<<<<<<<<<<<<< - * suboffset = view.suboffsets[dim] - * - */ - __pyx_t_2 = (__pyx_v_view->suboffsets != NULL); - if (__pyx_t_2) { - - /* "View.MemoryView":909 - * stride = view.strides[dim] - * if view.suboffsets != NULL: - * suboffset = view.suboffsets[dim] # <<<<<<<<<<<<<< - * - * if index < 0: - */ - __pyx_v_suboffset = (__pyx_v_view->suboffsets[__pyx_v_dim]); - - /* "View.MemoryView":908 - * shape = view.shape[dim] - * stride = view.strides[dim] - * if view.suboffsets != NULL: # <<<<<<<<<<<<<< - * suboffset = view.suboffsets[dim] - * - */ - } - } - __pyx_L3:; - - /* "View.MemoryView":911 - * suboffset = view.suboffsets[dim] - * - * if index < 0: # <<<<<<<<<<<<<< - * index += view.shape[dim] - * if index < 0: - */ - __pyx_t_2 = (__pyx_v_index < 0); - if (__pyx_t_2) { - - /* "View.MemoryView":912 - * - * if index < 0: - * index += view.shape[dim] # <<<<<<<<<<<<<< - * if index < 0: - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - */ - __pyx_v_index = (__pyx_v_index + (__pyx_v_view->shape[__pyx_v_dim])); - - /* "View.MemoryView":913 - * if index < 0: - * index += view.shape[dim] - * if index < 0: # <<<<<<<<<<<<<< - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - */ - __pyx_t_2 = (__pyx_v_index < 0); - if (unlikely(__pyx_t_2)) { - - /* "View.MemoryView":914 - * index += view.shape[dim] - * if index < 0: - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" # <<<<<<<<<<<<<< - * - * if index >= shape: - */ - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 914, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = 0; - __pyx_t_4 = 127; - __Pyx_INCREF(__pyx_kp_u_Out_of_bounds_on_buffer_access_a); - __pyx_t_1 += 37; - __Pyx_GIVEREF(__pyx_kp_u_Out_of_bounds_on_buffer_access_a); - PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_kp_u_Out_of_bounds_on_buffer_access_a); - __pyx_t_5 = __Pyx_PyUnicode_From_Py_ssize_t(__pyx_v_dim, 0, ' ', 'd'); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 914, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_5); - __Pyx_GIVEREF(__pyx_t_5); - PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_5); - __pyx_t_5 = 0; - __Pyx_INCREF(__pyx_kp_u__7); - __pyx_t_1 += 1; - __Pyx_GIVEREF(__pyx_kp_u__7); - PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_kp_u__7); - __pyx_t_5 = __Pyx_PyUnicode_Join(__pyx_t_3, 3, __pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 914, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_builtin_IndexError, __pyx_t_5, 0, 0); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __PYX_ERR(1, 914, __pyx_L1_error) - - /* "View.MemoryView":913 - * if index < 0: - * index += view.shape[dim] - * if index < 0: # <<<<<<<<<<<<<< - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - */ - } - - /* "View.MemoryView":911 - * suboffset = view.suboffsets[dim] - * - * if index < 0: # <<<<<<<<<<<<<< - * index += view.shape[dim] - * if index < 0: - */ - } - - /* "View.MemoryView":916 - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - * if index >= shape: # <<<<<<<<<<<<<< - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - */ - __pyx_t_2 = (__pyx_v_index >= __pyx_v_shape); - if (unlikely(__pyx_t_2)) { - - /* "View.MemoryView":917 - * - * if index >= shape: - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" # <<<<<<<<<<<<<< - * - * resultp = bufp + index * stride - */ - __pyx_t_5 = PyTuple_New(3); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 917, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_1 = 0; - __pyx_t_4 = 127; - __Pyx_INCREF(__pyx_kp_u_Out_of_bounds_on_buffer_access_a); - __pyx_t_1 += 37; - __Pyx_GIVEREF(__pyx_kp_u_Out_of_bounds_on_buffer_access_a); - PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_kp_u_Out_of_bounds_on_buffer_access_a); - __pyx_t_3 = __Pyx_PyUnicode_From_Py_ssize_t(__pyx_v_dim, 0, ' ', 'd'); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 917, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_3); - PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_3); - __pyx_t_3 = 0; - __Pyx_INCREF(__pyx_kp_u__7); - __pyx_t_1 += 1; - __Pyx_GIVEREF(__pyx_kp_u__7); - PyTuple_SET_ITEM(__pyx_t_5, 2, __pyx_kp_u__7); - __pyx_t_3 = __Pyx_PyUnicode_Join(__pyx_t_5, 3, __pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 917, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_Raise(__pyx_builtin_IndexError, __pyx_t_3, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 917, __pyx_L1_error) - - /* "View.MemoryView":916 - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - * if index >= shape: # <<<<<<<<<<<<<< - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - */ - } - - /* "View.MemoryView":919 - * raise IndexError, f"Out of bounds on buffer access (axis {dim})" - * - * resultp = bufp + index * stride # <<<<<<<<<<<<<< - * if suboffset >= 0: - * resultp = ( resultp)[0] + suboffset - */ - __pyx_v_resultp = (__pyx_v_bufp + (__pyx_v_index * __pyx_v_stride)); - - /* "View.MemoryView":920 - * - * resultp = bufp + index * stride - * if suboffset >= 0: # <<<<<<<<<<<<<< - * resultp = ( resultp)[0] + suboffset - * - */ - __pyx_t_2 = (__pyx_v_suboffset >= 0); - if (__pyx_t_2) { - - /* "View.MemoryView":921 - * resultp = bufp + index * stride - * if suboffset >= 0: - * resultp = ( resultp)[0] + suboffset # <<<<<<<<<<<<<< - * - * return resultp - */ - __pyx_v_resultp = ((((char **)__pyx_v_resultp)[0]) + __pyx_v_suboffset); - - /* "View.MemoryView":920 - * - * resultp = bufp + index * stride - * if suboffset >= 0: # <<<<<<<<<<<<<< - * resultp = ( resultp)[0] + suboffset - * - */ - } - - /* "View.MemoryView":923 - * resultp = ( resultp)[0] + suboffset - * - * return resultp # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = __pyx_v_resultp; - goto __pyx_L0; - - /* "View.MemoryView":896 - * - * @cname('__pyx_pybuffer_index') - * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, # <<<<<<<<<<<<<< - * Py_ssize_t dim) except NULL: - * cdef Py_ssize_t shape, stride, suboffset = -1 - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_AddTraceback("View.MemoryView.pybuffer_index", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":929 - * - * @cname('__pyx_memslice_transpose') - * cdef int transpose_memslice(__Pyx_memviewslice *memslice) except -1 nogil: # <<<<<<<<<<<<<< - * cdef int ndim = memslice.memview.view.ndim - * - */ - -static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { - int __pyx_v_ndim; - Py_ssize_t *__pyx_v_shape; - Py_ssize_t *__pyx_v_strides; - int __pyx_v_i; - int __pyx_v_j; - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - Py_ssize_t *__pyx_t_2; - long __pyx_t_3; - long __pyx_t_4; - Py_ssize_t __pyx_t_5; - Py_ssize_t __pyx_t_6; - int __pyx_t_7; - int __pyx_t_8; - int __pyx_t_9; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save; - #endif - __Pyx_RefNannySetupContext("transpose_memslice", 1); - - /* "View.MemoryView":930 - * @cname('__pyx_memslice_transpose') - * cdef int transpose_memslice(__Pyx_memviewslice *memslice) except -1 nogil: - * cdef int ndim = memslice.memview.view.ndim # <<<<<<<<<<<<<< - * - * cdef Py_ssize_t *shape = memslice.shape - */ - __pyx_t_1 = __pyx_v_memslice->memview->view.ndim; - __pyx_v_ndim = __pyx_t_1; - - /* "View.MemoryView":932 - * cdef int ndim = memslice.memview.view.ndim - * - * cdef Py_ssize_t *shape = memslice.shape # <<<<<<<<<<<<<< - * cdef Py_ssize_t *strides = memslice.strides - * - */ - __pyx_t_2 = __pyx_v_memslice->shape; - __pyx_v_shape = __pyx_t_2; - - /* "View.MemoryView":933 - * - * cdef Py_ssize_t *shape = memslice.shape - * cdef Py_ssize_t *strides = memslice.strides # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_2 = __pyx_v_memslice->strides; - __pyx_v_strides = __pyx_t_2; - - /* "View.MemoryView":937 - * - * cdef int i, j - * for i in range(ndim // 2): # <<<<<<<<<<<<<< - * j = ndim - 1 - i - * strides[i], strides[j] = strides[j], strides[i] - */ - __pyx_t_3 = __Pyx_div_long(__pyx_v_ndim, 2); - __pyx_t_4 = __pyx_t_3; - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_4; __pyx_t_1+=1) { - __pyx_v_i = __pyx_t_1; - - /* "View.MemoryView":938 - * cdef int i, j - * for i in range(ndim // 2): - * j = ndim - 1 - i # <<<<<<<<<<<<<< - * strides[i], strides[j] = strides[j], strides[i] - * shape[i], shape[j] = shape[j], shape[i] - */ - __pyx_v_j = ((__pyx_v_ndim - 1) - __pyx_v_i); - - /* "View.MemoryView":939 - * for i in range(ndim // 2): - * j = ndim - 1 - i - * strides[i], strides[j] = strides[j], strides[i] # <<<<<<<<<<<<<< - * shape[i], shape[j] = shape[j], shape[i] - * - */ - __pyx_t_5 = (__pyx_v_strides[__pyx_v_j]); - __pyx_t_6 = (__pyx_v_strides[__pyx_v_i]); - (__pyx_v_strides[__pyx_v_i]) = __pyx_t_5; - (__pyx_v_strides[__pyx_v_j]) = __pyx_t_6; - - /* "View.MemoryView":940 - * j = ndim - 1 - i - * strides[i], strides[j] = strides[j], strides[i] - * shape[i], shape[j] = shape[j], shape[i] # <<<<<<<<<<<<<< - * - * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: - */ - __pyx_t_6 = (__pyx_v_shape[__pyx_v_j]); - __pyx_t_5 = (__pyx_v_shape[__pyx_v_i]); - (__pyx_v_shape[__pyx_v_i]) = __pyx_t_6; - (__pyx_v_shape[__pyx_v_j]) = __pyx_t_5; - - /* "View.MemoryView":942 - * shape[i], shape[j] = shape[j], shape[i] - * - * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: # <<<<<<<<<<<<<< - * _err(PyExc_ValueError, "Cannot transpose memoryview with indirect dimensions") - * - */ - __pyx_t_8 = ((__pyx_v_memslice->suboffsets[__pyx_v_i]) >= 0); - if (!__pyx_t_8) { - } else { - __pyx_t_7 = __pyx_t_8; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_8 = ((__pyx_v_memslice->suboffsets[__pyx_v_j]) >= 0); - __pyx_t_7 = __pyx_t_8; - __pyx_L6_bool_binop_done:; - if (__pyx_t_7) { - - /* "View.MemoryView":943 - * - * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: - * _err(PyExc_ValueError, "Cannot transpose memoryview with indirect dimensions") # <<<<<<<<<<<<<< - * - * return 0 - */ - __pyx_t_9 = __pyx_memoryview_err(PyExc_ValueError, __pyx_kp_s_Cannot_transpose_memoryview_with); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 943, __pyx_L1_error) - - /* "View.MemoryView":942 - * shape[i], shape[j] = shape[j], shape[i] - * - * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: # <<<<<<<<<<<<<< - * _err(PyExc_ValueError, "Cannot transpose memoryview with indirect dimensions") - * - */ - } - } - - /* "View.MemoryView":945 - * _err(PyExc_ValueError, "Cannot transpose memoryview with indirect dimensions") - * - * return 0 # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":929 - * - * @cname('__pyx_memslice_transpose') - * cdef int transpose_memslice(__Pyx_memviewslice *memslice) except -1 nogil: # <<<<<<<<<<<<<< - * cdef int ndim = memslice.memview.view.ndim - * - */ - - /* function exit code */ - __pyx_L1_error:; - #ifdef WITH_THREAD - __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_AddTraceback("View.MemoryView.transpose_memslice", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __pyx_L0:; - __Pyx_RefNannyFinishContextNogil() - return __pyx_r; -} - -/* "View.MemoryView":963 - * cdef int (*to_dtype_func)(char *, object) except 0 - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * __PYX_XCLEAR_MEMVIEW(&self.from_slice, 1) - * - */ - -/* Python wrapper */ -static void __pyx_memoryviewslice___dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_memoryviewslice___dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(struct __pyx_memoryviewslice_obj *__pyx_v_self) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__", 0); - - /* "View.MemoryView":964 - * - * def __dealloc__(self): - * __PYX_XCLEAR_MEMVIEW(&self.from_slice, 1) # <<<<<<<<<<<<<< - * - * cdef convert_item_to_object(self, char *itemp): - */ - __PYX_XCLEAR_MEMVIEW((&__pyx_v_self->from_slice), 1); - - /* "View.MemoryView":963 - * cdef int (*to_dtype_func)(char *, object) except 0 - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * __PYX_XCLEAR_MEMVIEW(&self.from_slice, 1) - * - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "View.MemoryView":966 - * __PYX_XCLEAR_MEMVIEW(&self.from_slice, 1) - * - * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< - * if self.to_object_func != NULL: - * return self.to_object_func(itemp) - */ - -static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("convert_item_to_object", 0); - - /* "View.MemoryView":967 - * - * cdef convert_item_to_object(self, char *itemp): - * if self.to_object_func != NULL: # <<<<<<<<<<<<<< - * return self.to_object_func(itemp) - * else: - */ - __pyx_t_1 = (__pyx_v_self->to_object_func != NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":968 - * cdef convert_item_to_object(self, char *itemp): - * if self.to_object_func != NULL: - * return self.to_object_func(itemp) # <<<<<<<<<<<<<< - * else: - * return memoryview.convert_item_to_object(self, itemp) - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_v_self->to_object_func(__pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 968, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "View.MemoryView":967 - * - * cdef convert_item_to_object(self, char *itemp): - * if self.to_object_func != NULL: # <<<<<<<<<<<<<< - * return self.to_object_func(itemp) - * else: - */ - } - - /* "View.MemoryView":970 - * return self.to_object_func(itemp) - * else: - * return memoryview.convert_item_to_object(self, itemp) # <<<<<<<<<<<<<< - * - * cdef assign_item_from_object(self, char *itemp, object value): - */ - /*else*/ { - __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_memoryview_convert_item_to_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 970, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - } - - /* "View.MemoryView":966 - * __PYX_XCLEAR_MEMVIEW(&self.from_slice, 1) - * - * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< - * if self.to_object_func != NULL: - * return self.to_object_func(itemp) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView._memoryviewslice.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":972 - * return memoryview.convert_item_to_object(self, itemp) - * - * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< - * if self.to_dtype_func != NULL: - * self.to_dtype_func(itemp, value) - */ - -static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("assign_item_from_object", 0); - - /* "View.MemoryView":973 - * - * cdef assign_item_from_object(self, char *itemp, object value): - * if self.to_dtype_func != NULL: # <<<<<<<<<<<<<< - * self.to_dtype_func(itemp, value) - * else: - */ - __pyx_t_1 = (__pyx_v_self->to_dtype_func != NULL); - if (__pyx_t_1) { - - /* "View.MemoryView":974 - * cdef assign_item_from_object(self, char *itemp, object value): - * if self.to_dtype_func != NULL: - * self.to_dtype_func(itemp, value) # <<<<<<<<<<<<<< - * else: - * memoryview.assign_item_from_object(self, itemp, value) - */ - __pyx_t_2 = __pyx_v_self->to_dtype_func(__pyx_v_itemp, __pyx_v_value); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(1, 974, __pyx_L1_error) - - /* "View.MemoryView":973 - * - * cdef assign_item_from_object(self, char *itemp, object value): - * if self.to_dtype_func != NULL: # <<<<<<<<<<<<<< - * self.to_dtype_func(itemp, value) - * else: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":976 - * self.to_dtype_func(itemp, value) - * else: - * memoryview.assign_item_from_object(self, itemp, value) # <<<<<<<<<<<<<< - * - * cdef _get_base(self): - */ - /*else*/ { - __pyx_t_3 = __pyx_memoryview_assign_item_from_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 976, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __pyx_L3:; - - /* "View.MemoryView":972 - * return memoryview.convert_item_to_object(self, itemp) - * - * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< - * if self.to_dtype_func != NULL: - * self.to_dtype_func(itemp, value) - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView._memoryviewslice.assign_item_from_object", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":978 - * memoryview.assign_item_from_object(self, itemp, value) - * - * cdef _get_base(self): # <<<<<<<<<<<<<< - * return self.from_object - * - */ - -static PyObject *__pyx_memoryviewslice__get_base(struct __pyx_memoryviewslice_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("_get_base", 0); - - /* "View.MemoryView":979 - * - * cdef _get_base(self): - * return self.from_object # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_self->from_object); - __pyx_r = __pyx_v_self->from_object; - goto __pyx_L0; - - /* "View.MemoryView":978 - * memoryview.assign_item_from_object(self, itemp, value) - * - * cdef _get_base(self): # <<<<<<<<<<<<<< - * return self.from_object - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_memoryviewslice_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_memoryviewslice_1__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("View.MemoryView._memoryviewslice.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_memoryviewslice___reduce_cython__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_memoryviewslice___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView._memoryviewslice.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw___pyx_memoryviewslice_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyObject *__pyx_pw___pyx_memoryviewslice_3__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView._memoryviewslice.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf___pyx_memoryviewslice_2__setstate_cython__(((struct __pyx_memoryviewslice_obj *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView._memoryviewslice.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":999 - * - * @cname('__pyx_memoryview_fromslice') - * cdef memoryview_fromslice(__Pyx_memviewslice memviewslice, # <<<<<<<<<<<<<< - * int ndim, - * object (*to_object_func)(char *), - */ - -static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewslice, int __pyx_v_ndim, PyObject *(*__pyx_v_to_object_func)(char *), int (*__pyx_v_to_dtype_func)(char *, PyObject *), int __pyx_v_dtype_is_object) { - struct __pyx_memoryviewslice_obj *__pyx_v_result = 0; - Py_ssize_t __pyx_v_suboffset; - PyObject *__pyx_v_length = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - __Pyx_TypeInfo *__pyx_t_4; - Py_buffer __pyx_t_5; - Py_ssize_t *__pyx_t_6; - Py_ssize_t *__pyx_t_7; - Py_ssize_t *__pyx_t_8; - Py_ssize_t __pyx_t_9; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("memoryview_fromslice", 0); - - /* "View.MemoryView":1007 - * cdef _memoryviewslice result - * - * if memviewslice.memview == Py_None: # <<<<<<<<<<<<<< - * return None - * - */ - __pyx_t_1 = (((PyObject *)__pyx_v_memviewslice.memview) == Py_None); - if (__pyx_t_1) { - - /* "View.MemoryView":1008 - * - * if memviewslice.memview == Py_None: - * return None # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - - /* "View.MemoryView":1007 - * cdef _memoryviewslice result - * - * if memviewslice.memview == Py_None: # <<<<<<<<<<<<<< - * return None - * - */ - } - - /* "View.MemoryView":1013 - * - * - * result = _memoryviewslice.__new__(_memoryviewslice, None, 0, dtype_is_object) # <<<<<<<<<<<<<< - * - * result.from_slice = memviewslice - */ - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1013, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1013, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(Py_None); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, Py_None)) __PYX_ERR(1, 1013, __pyx_L1_error); - __Pyx_INCREF(__pyx_int_0); - __Pyx_GIVEREF(__pyx_int_0); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_int_0)) __PYX_ERR(1, 1013, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2)) __PYX_ERR(1, 1013, __pyx_L1_error); - __pyx_t_2 = 0; - __pyx_t_2 = ((PyObject *)__pyx_tp_new__memoryviewslice(((PyTypeObject *)__pyx_memoryviewslice_type), __pyx_t_3, NULL)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1013, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_2); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_result = ((struct __pyx_memoryviewslice_obj *)__pyx_t_2); - __pyx_t_2 = 0; - - /* "View.MemoryView":1015 - * result = _memoryviewslice.__new__(_memoryviewslice, None, 0, dtype_is_object) - * - * result.from_slice = memviewslice # <<<<<<<<<<<<<< - * __PYX_INC_MEMVIEW(&memviewslice, 1) - * - */ - __pyx_v_result->from_slice = __pyx_v_memviewslice; - - /* "View.MemoryView":1016 - * - * result.from_slice = memviewslice - * __PYX_INC_MEMVIEW(&memviewslice, 1) # <<<<<<<<<<<<<< - * - * result.from_object = ( memviewslice.memview)._get_base() - */ - __PYX_INC_MEMVIEW((&__pyx_v_memviewslice), 1); - - /* "View.MemoryView":1018 - * __PYX_INC_MEMVIEW(&memviewslice, 1) - * - * result.from_object = ( memviewslice.memview)._get_base() # <<<<<<<<<<<<<< - * result.typeinfo = memviewslice.memview.typeinfo - * - */ - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)((struct __pyx_memoryview_obj *)__pyx_v_memviewslice.memview)->__pyx_vtab)->_get_base(((struct __pyx_memoryview_obj *)__pyx_v_memviewslice.memview)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1018, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_GIVEREF(__pyx_t_2); - __Pyx_GOTREF(__pyx_v_result->from_object); - __Pyx_DECREF(__pyx_v_result->from_object); - __pyx_v_result->from_object = __pyx_t_2; - __pyx_t_2 = 0; - - /* "View.MemoryView":1019 - * - * result.from_object = ( memviewslice.memview)._get_base() - * result.typeinfo = memviewslice.memview.typeinfo # <<<<<<<<<<<<<< - * - * result.view = memviewslice.memview.view - */ - __pyx_t_4 = __pyx_v_memviewslice.memview->typeinfo; - __pyx_v_result->__pyx_base.typeinfo = __pyx_t_4; - - /* "View.MemoryView":1021 - * result.typeinfo = memviewslice.memview.typeinfo - * - * result.view = memviewslice.memview.view # <<<<<<<<<<<<<< - * result.view.buf = memviewslice.data - * result.view.ndim = ndim - */ - __pyx_t_5 = __pyx_v_memviewslice.memview->view; - __pyx_v_result->__pyx_base.view = __pyx_t_5; - - /* "View.MemoryView":1022 - * - * result.view = memviewslice.memview.view - * result.view.buf = memviewslice.data # <<<<<<<<<<<<<< - * result.view.ndim = ndim - * (<__pyx_buffer *> &result.view).obj = Py_None - */ - __pyx_v_result->__pyx_base.view.buf = ((void *)__pyx_v_memviewslice.data); - - /* "View.MemoryView":1023 - * result.view = memviewslice.memview.view - * result.view.buf = memviewslice.data - * result.view.ndim = ndim # <<<<<<<<<<<<<< - * (<__pyx_buffer *> &result.view).obj = Py_None - * Py_INCREF(Py_None) - */ - __pyx_v_result->__pyx_base.view.ndim = __pyx_v_ndim; - - /* "View.MemoryView":1024 - * result.view.buf = memviewslice.data - * result.view.ndim = ndim - * (<__pyx_buffer *> &result.view).obj = Py_None # <<<<<<<<<<<<<< - * Py_INCREF(Py_None) - * - */ - ((Py_buffer *)(&__pyx_v_result->__pyx_base.view))->obj = Py_None; - - /* "View.MemoryView":1025 - * result.view.ndim = ndim - * (<__pyx_buffer *> &result.view).obj = Py_None - * Py_INCREF(Py_None) # <<<<<<<<<<<<<< - * - * if (memviewslice.memview).flags & PyBUF_WRITABLE: - */ - Py_INCREF(Py_None); - - /* "View.MemoryView":1027 - * Py_INCREF(Py_None) - * - * if (memviewslice.memview).flags & PyBUF_WRITABLE: # <<<<<<<<<<<<<< - * result.flags = PyBUF_RECORDS - * else: - */ - __pyx_t_1 = ((((struct __pyx_memoryview_obj *)__pyx_v_memviewslice.memview)->flags & PyBUF_WRITABLE) != 0); - if (__pyx_t_1) { - - /* "View.MemoryView":1028 - * - * if (memviewslice.memview).flags & PyBUF_WRITABLE: - * result.flags = PyBUF_RECORDS # <<<<<<<<<<<<<< - * else: - * result.flags = PyBUF_RECORDS_RO - */ - __pyx_v_result->__pyx_base.flags = PyBUF_RECORDS; - - /* "View.MemoryView":1027 - * Py_INCREF(Py_None) - * - * if (memviewslice.memview).flags & PyBUF_WRITABLE: # <<<<<<<<<<<<<< - * result.flags = PyBUF_RECORDS - * else: - */ - goto __pyx_L4; - } - - /* "View.MemoryView":1030 - * result.flags = PyBUF_RECORDS - * else: - * result.flags = PyBUF_RECORDS_RO # <<<<<<<<<<<<<< - * - * result.view.shape = result.from_slice.shape - */ - /*else*/ { - __pyx_v_result->__pyx_base.flags = PyBUF_RECORDS_RO; - } - __pyx_L4:; - - /* "View.MemoryView":1032 - * result.flags = PyBUF_RECORDS_RO - * - * result.view.shape = result.from_slice.shape # <<<<<<<<<<<<<< - * result.view.strides = result.from_slice.strides - * - */ - __pyx_v_result->__pyx_base.view.shape = ((Py_ssize_t *)__pyx_v_result->from_slice.shape); - - /* "View.MemoryView":1033 - * - * result.view.shape = result.from_slice.shape - * result.view.strides = result.from_slice.strides # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_result->__pyx_base.view.strides = ((Py_ssize_t *)__pyx_v_result->from_slice.strides); - - /* "View.MemoryView":1036 - * - * - * result.view.suboffsets = NULL # <<<<<<<<<<<<<< - * for suboffset in result.from_slice.suboffsets[:ndim]: - * if suboffset >= 0: - */ - __pyx_v_result->__pyx_base.view.suboffsets = NULL; - - /* "View.MemoryView":1037 - * - * result.view.suboffsets = NULL - * for suboffset in result.from_slice.suboffsets[:ndim]: # <<<<<<<<<<<<<< - * if suboffset >= 0: - * result.view.suboffsets = result.from_slice.suboffsets - */ - __pyx_t_7 = (__pyx_v_result->from_slice.suboffsets + __pyx_v_ndim); - for (__pyx_t_8 = __pyx_v_result->from_slice.suboffsets; __pyx_t_8 < __pyx_t_7; __pyx_t_8++) { - __pyx_t_6 = __pyx_t_8; - __pyx_v_suboffset = (__pyx_t_6[0]); - - /* "View.MemoryView":1038 - * result.view.suboffsets = NULL - * for suboffset in result.from_slice.suboffsets[:ndim]: - * if suboffset >= 0: # <<<<<<<<<<<<<< - * result.view.suboffsets = result.from_slice.suboffsets - * break - */ - __pyx_t_1 = (__pyx_v_suboffset >= 0); - if (__pyx_t_1) { - - /* "View.MemoryView":1039 - * for suboffset in result.from_slice.suboffsets[:ndim]: - * if suboffset >= 0: - * result.view.suboffsets = result.from_slice.suboffsets # <<<<<<<<<<<<<< - * break - * - */ - __pyx_v_result->__pyx_base.view.suboffsets = ((Py_ssize_t *)__pyx_v_result->from_slice.suboffsets); - - /* "View.MemoryView":1040 - * if suboffset >= 0: - * result.view.suboffsets = result.from_slice.suboffsets - * break # <<<<<<<<<<<<<< - * - * result.view.len = result.view.itemsize - */ - goto __pyx_L6_break; - - /* "View.MemoryView":1038 - * result.view.suboffsets = NULL - * for suboffset in result.from_slice.suboffsets[:ndim]: - * if suboffset >= 0: # <<<<<<<<<<<<<< - * result.view.suboffsets = result.from_slice.suboffsets - * break - */ - } - } - __pyx_L6_break:; - - /* "View.MemoryView":1042 - * break - * - * result.view.len = result.view.itemsize # <<<<<<<<<<<<<< - * for length in result.view.shape[:ndim]: - * result.view.len *= length - */ - __pyx_t_9 = __pyx_v_result->__pyx_base.view.itemsize; - __pyx_v_result->__pyx_base.view.len = __pyx_t_9; - - /* "View.MemoryView":1043 - * - * result.view.len = result.view.itemsize - * for length in result.view.shape[:ndim]: # <<<<<<<<<<<<<< - * result.view.len *= length - * - */ - __pyx_t_7 = (__pyx_v_result->__pyx_base.view.shape + __pyx_v_ndim); - for (__pyx_t_8 = __pyx_v_result->__pyx_base.view.shape; __pyx_t_8 < __pyx_t_7; __pyx_t_8++) { - __pyx_t_6 = __pyx_t_8; - __pyx_t_2 = PyInt_FromSsize_t((__pyx_t_6[0])); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1043, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_XDECREF_SET(__pyx_v_length, __pyx_t_2); - __pyx_t_2 = 0; - - /* "View.MemoryView":1044 - * result.view.len = result.view.itemsize - * for length in result.view.shape[:ndim]: - * result.view.len *= length # <<<<<<<<<<<<<< - * - * result.to_object_func = to_object_func - */ - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_result->__pyx_base.view.len); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1044, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_InPlaceMultiply(__pyx_t_2, __pyx_v_length); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1044, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 1044, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_v_result->__pyx_base.view.len = __pyx_t_9; - } - - /* "View.MemoryView":1046 - * result.view.len *= length - * - * result.to_object_func = to_object_func # <<<<<<<<<<<<<< - * result.to_dtype_func = to_dtype_func - * - */ - __pyx_v_result->to_object_func = __pyx_v_to_object_func; - - /* "View.MemoryView":1047 - * - * result.to_object_func = to_object_func - * result.to_dtype_func = to_dtype_func # <<<<<<<<<<<<<< - * - * return result - */ - __pyx_v_result->to_dtype_func = __pyx_v_to_dtype_func; - - /* "View.MemoryView":1049 - * result.to_dtype_func = to_dtype_func - * - * return result # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_get_slice_from_memoryview') - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_result); - __pyx_r = ((PyObject *)__pyx_v_result); - goto __pyx_L0; - - /* "View.MemoryView":999 - * - * @cname('__pyx_memoryview_fromslice') - * cdef memoryview_fromslice(__Pyx_memviewslice memviewslice, # <<<<<<<<<<<<<< - * int ndim, - * object (*to_object_func)(char *), - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("View.MemoryView.memoryview_fromslice", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_result); - __Pyx_XDECREF(__pyx_v_length); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":1052 - * - * @cname('__pyx_memoryview_get_slice_from_memoryview') - * cdef __Pyx_memviewslice *get_slice_from_memview(memoryview memview, # <<<<<<<<<<<<<< - * __Pyx_memviewslice *mslice) except NULL: - * cdef _memoryviewslice obj - */ - -static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __pyx_memoryview_obj *__pyx_v_memview, __Pyx_memviewslice *__pyx_v_mslice) { - struct __pyx_memoryviewslice_obj *__pyx_v_obj = 0; - __Pyx_memviewslice *__pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get_slice_from_memview", 0); - - /* "View.MemoryView":1055 - * __Pyx_memviewslice *mslice) except NULL: - * cdef _memoryviewslice obj - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * obj = memview - * return &obj.from_slice - */ - __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); - if (__pyx_t_1) { - - /* "View.MemoryView":1056 - * cdef _memoryviewslice obj - * if isinstance(memview, _memoryviewslice): - * obj = memview # <<<<<<<<<<<<<< - * return &obj.from_slice - * else: - */ - if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(1, 1056, __pyx_L1_error) - __pyx_t_2 = ((PyObject *)__pyx_v_memview); - __Pyx_INCREF(__pyx_t_2); - __pyx_v_obj = ((struct __pyx_memoryviewslice_obj *)__pyx_t_2); - __pyx_t_2 = 0; - - /* "View.MemoryView":1057 - * if isinstance(memview, _memoryviewslice): - * obj = memview - * return &obj.from_slice # <<<<<<<<<<<<<< - * else: - * slice_copy(memview, mslice) - */ - __pyx_r = (&__pyx_v_obj->from_slice); - goto __pyx_L0; - - /* "View.MemoryView":1055 - * __Pyx_memviewslice *mslice) except NULL: - * cdef _memoryviewslice obj - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * obj = memview - * return &obj.from_slice - */ - } - - /* "View.MemoryView":1059 - * return &obj.from_slice - * else: - * slice_copy(memview, mslice) # <<<<<<<<<<<<<< - * return mslice - * - */ - /*else*/ { - __pyx_memoryview_slice_copy(__pyx_v_memview, __pyx_v_mslice); - - /* "View.MemoryView":1060 - * else: - * slice_copy(memview, mslice) - * return mslice # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_slice_copy') - */ - __pyx_r = __pyx_v_mslice; - goto __pyx_L0; - } - - /* "View.MemoryView":1052 - * - * @cname('__pyx_memoryview_get_slice_from_memoryview') - * cdef __Pyx_memviewslice *get_slice_from_memview(memoryview memview, # <<<<<<<<<<<<<< - * __Pyx_memviewslice *mslice) except NULL: - * cdef _memoryviewslice obj - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView.get_slice_from_memview", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_obj); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":1063 - * - * @cname('__pyx_memoryview_slice_copy') - * cdef void slice_copy(memoryview memview, __Pyx_memviewslice *dst) noexcept: # <<<<<<<<<<<<<< - * cdef int dim - * cdef (Py_ssize_t*) shape, strides, suboffsets - */ - -static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_memview, __Pyx_memviewslice *__pyx_v_dst) { - int __pyx_v_dim; - Py_ssize_t *__pyx_v_shape; - Py_ssize_t *__pyx_v_strides; - Py_ssize_t *__pyx_v_suboffsets; - __Pyx_RefNannyDeclarations - Py_ssize_t *__pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - Py_ssize_t __pyx_t_5; - int __pyx_t_6; - __Pyx_RefNannySetupContext("slice_copy", 0); - - /* "View.MemoryView":1067 - * cdef (Py_ssize_t*) shape, strides, suboffsets - * - * shape = memview.view.shape # <<<<<<<<<<<<<< - * strides = memview.view.strides - * suboffsets = memview.view.suboffsets - */ - __pyx_t_1 = __pyx_v_memview->view.shape; - __pyx_v_shape = __pyx_t_1; - - /* "View.MemoryView":1068 - * - * shape = memview.view.shape - * strides = memview.view.strides # <<<<<<<<<<<<<< - * suboffsets = memview.view.suboffsets - * - */ - __pyx_t_1 = __pyx_v_memview->view.strides; - __pyx_v_strides = __pyx_t_1; - - /* "View.MemoryView":1069 - * shape = memview.view.shape - * strides = memview.view.strides - * suboffsets = memview.view.suboffsets # <<<<<<<<<<<<<< - * - * dst.memview = <__pyx_memoryview *> memview - */ - __pyx_t_1 = __pyx_v_memview->view.suboffsets; - __pyx_v_suboffsets = __pyx_t_1; - - /* "View.MemoryView":1071 - * suboffsets = memview.view.suboffsets - * - * dst.memview = <__pyx_memoryview *> memview # <<<<<<<<<<<<<< - * dst.data = memview.view.buf - * - */ - __pyx_v_dst->memview = ((struct __pyx_memoryview_obj *)__pyx_v_memview); - - /* "View.MemoryView":1072 - * - * dst.memview = <__pyx_memoryview *> memview - * dst.data = memview.view.buf # <<<<<<<<<<<<<< - * - * for dim in range(memview.view.ndim): - */ - __pyx_v_dst->data = ((char *)__pyx_v_memview->view.buf); - - /* "View.MemoryView":1074 - * dst.data = memview.view.buf - * - * for dim in range(memview.view.ndim): # <<<<<<<<<<<<<< - * dst.shape[dim] = shape[dim] - * dst.strides[dim] = strides[dim] - */ - __pyx_t_2 = __pyx_v_memview->view.ndim; - __pyx_t_3 = __pyx_t_2; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_dim = __pyx_t_4; - - /* "View.MemoryView":1075 - * - * for dim in range(memview.view.ndim): - * dst.shape[dim] = shape[dim] # <<<<<<<<<<<<<< - * dst.strides[dim] = strides[dim] - * dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1 - */ - (__pyx_v_dst->shape[__pyx_v_dim]) = (__pyx_v_shape[__pyx_v_dim]); - - /* "View.MemoryView":1076 - * for dim in range(memview.view.ndim): - * dst.shape[dim] = shape[dim] - * dst.strides[dim] = strides[dim] # <<<<<<<<<<<<<< - * dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1 - * - */ - (__pyx_v_dst->strides[__pyx_v_dim]) = (__pyx_v_strides[__pyx_v_dim]); - - /* "View.MemoryView":1077 - * dst.shape[dim] = shape[dim] - * dst.strides[dim] = strides[dim] - * dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1 # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_copy_object') - */ - __pyx_t_6 = (__pyx_v_suboffsets != 0); - if (__pyx_t_6) { - __pyx_t_5 = (__pyx_v_suboffsets[__pyx_v_dim]); - } else { - __pyx_t_5 = -1L; - } - (__pyx_v_dst->suboffsets[__pyx_v_dim]) = __pyx_t_5; - } - - /* "View.MemoryView":1063 - * - * @cname('__pyx_memoryview_slice_copy') - * cdef void slice_copy(memoryview memview, __Pyx_memviewslice *dst) noexcept: # <<<<<<<<<<<<<< - * cdef int dim - * cdef (Py_ssize_t*) shape, strides, suboffsets - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "View.MemoryView":1080 - * - * @cname('__pyx_memoryview_copy_object') - * cdef memoryview_copy(memoryview memview): # <<<<<<<<<<<<<< - * "Create a new memoryview object" - * cdef __Pyx_memviewslice memviewslice - */ - -static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *__pyx_v_memview) { - __Pyx_memviewslice __pyx_v_memviewslice; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("memoryview_copy", 0); - - /* "View.MemoryView":1083 - * "Create a new memoryview object" - * cdef __Pyx_memviewslice memviewslice - * slice_copy(memview, &memviewslice) # <<<<<<<<<<<<<< - * return memoryview_copy_from_slice(memview, &memviewslice) - * - */ - __pyx_memoryview_slice_copy(__pyx_v_memview, (&__pyx_v_memviewslice)); - - /* "View.MemoryView":1084 - * cdef __Pyx_memviewslice memviewslice - * slice_copy(memview, &memviewslice) - * return memoryview_copy_from_slice(memview, &memviewslice) # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_copy_object_from_slice') - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_memoryview_copy_object_from_slice(__pyx_v_memview, (&__pyx_v_memviewslice)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1084, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "View.MemoryView":1080 - * - * @cname('__pyx_memoryview_copy_object') - * cdef memoryview_copy(memoryview memview): # <<<<<<<<<<<<<< - * "Create a new memoryview object" - * cdef __Pyx_memviewslice memviewslice - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("View.MemoryView.memoryview_copy", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":1087 - * - * @cname('__pyx_memoryview_copy_object_from_slice') - * cdef memoryview_copy_from_slice(memoryview memview, __Pyx_memviewslice *memviewslice): # <<<<<<<<<<<<<< - * """ - * Create a new memoryview object from a given memoryview object and slice. - */ - -static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview_obj *__pyx_v_memview, __Pyx_memviewslice *__pyx_v_memviewslice) { - PyObject *(*__pyx_v_to_object_func)(char *); - int (*__pyx_v_to_dtype_func)(char *, PyObject *); - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *(*__pyx_t_2)(char *); - int (*__pyx_t_3)(char *, PyObject *); - PyObject *__pyx_t_4 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("memoryview_copy_from_slice", 0); - - /* "View.MemoryView":1094 - * cdef int (*to_dtype_func)(char *, object) except 0 - * - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * to_object_func = (<_memoryviewslice> memview).to_object_func - * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func - */ - __pyx_t_1 = __Pyx_TypeCheck(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type); - if (__pyx_t_1) { - - /* "View.MemoryView":1095 - * - * if isinstance(memview, _memoryviewslice): - * to_object_func = (<_memoryviewslice> memview).to_object_func # <<<<<<<<<<<<<< - * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func - * else: - */ - __pyx_t_2 = ((struct __pyx_memoryviewslice_obj *)__pyx_v_memview)->to_object_func; - __pyx_v_to_object_func = __pyx_t_2; - - /* "View.MemoryView":1096 - * if isinstance(memview, _memoryviewslice): - * to_object_func = (<_memoryviewslice> memview).to_object_func - * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func # <<<<<<<<<<<<<< - * else: - * to_object_func = NULL - */ - __pyx_t_3 = ((struct __pyx_memoryviewslice_obj *)__pyx_v_memview)->to_dtype_func; - __pyx_v_to_dtype_func = __pyx_t_3; - - /* "View.MemoryView":1094 - * cdef int (*to_dtype_func)(char *, object) except 0 - * - * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< - * to_object_func = (<_memoryviewslice> memview).to_object_func - * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func - */ - goto __pyx_L3; - } - - /* "View.MemoryView":1098 - * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func - * else: - * to_object_func = NULL # <<<<<<<<<<<<<< - * to_dtype_func = NULL - * - */ - /*else*/ { - __pyx_v_to_object_func = NULL; - - /* "View.MemoryView":1099 - * else: - * to_object_func = NULL - * to_dtype_func = NULL # <<<<<<<<<<<<<< - * - * return memoryview_fromslice(memviewslice[0], memview.view.ndim, - */ - __pyx_v_to_dtype_func = NULL; - } - __pyx_L3:; - - /* "View.MemoryView":1101 - * to_dtype_func = NULL - * - * return memoryview_fromslice(memviewslice[0], memview.view.ndim, # <<<<<<<<<<<<<< - * to_object_func, to_dtype_func, - * memview.dtype_is_object) - */ - __Pyx_XDECREF(__pyx_r); - - /* "View.MemoryView":1103 - * return memoryview_fromslice(memviewslice[0], memview.view.ndim, - * to_object_func, to_dtype_func, - * memview.dtype_is_object) # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_4 = __pyx_memoryview_fromslice((__pyx_v_memviewslice[0]), __pyx_v_memview->view.ndim, __pyx_v_to_object_func, __pyx_v_to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1101, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L0; - - /* "View.MemoryView":1087 - * - * @cname('__pyx_memoryview_copy_object_from_slice') - * cdef memoryview_copy_from_slice(memoryview memview, __Pyx_memviewslice *memviewslice): # <<<<<<<<<<<<<< - * """ - * Create a new memoryview object from a given memoryview object and slice. - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("View.MemoryView.memoryview_copy_from_slice", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "View.MemoryView":1109 - * - * - * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) noexcept nogil: # <<<<<<<<<<<<<< - * return -arg if arg < 0 else arg - * - */ - -static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { - Py_ssize_t __pyx_r; - Py_ssize_t __pyx_t_1; - int __pyx_t_2; - - /* "View.MemoryView":1110 - * - * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) noexcept nogil: - * return -arg if arg < 0 else arg # <<<<<<<<<<<<<< - * - * @cname('__pyx_get_best_slice_order') - */ - __pyx_t_2 = (__pyx_v_arg < 0); - if (__pyx_t_2) { - __pyx_t_1 = (-__pyx_v_arg); - } else { - __pyx_t_1 = __pyx_v_arg; - } - __pyx_r = __pyx_t_1; - goto __pyx_L0; - - /* "View.MemoryView":1109 - * - * - * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) noexcept nogil: # <<<<<<<<<<<<<< - * return -arg if arg < 0 else arg - * - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "View.MemoryView":1113 - * - * @cname('__pyx_get_best_slice_order') - * cdef char get_best_order(__Pyx_memviewslice *mslice, int ndim) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * Figure out the best memory access order for a given slice. - */ - -static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int __pyx_v_ndim) { - int __pyx_v_i; - Py_ssize_t __pyx_v_c_stride; - Py_ssize_t __pyx_v_f_stride; - char __pyx_r; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - - /* "View.MemoryView":1118 - * """ - * cdef int i - * cdef Py_ssize_t c_stride = 0 # <<<<<<<<<<<<<< - * cdef Py_ssize_t f_stride = 0 - * - */ - __pyx_v_c_stride = 0; - - /* "View.MemoryView":1119 - * cdef int i - * cdef Py_ssize_t c_stride = 0 - * cdef Py_ssize_t f_stride = 0 # <<<<<<<<<<<<<< - * - * for i in range(ndim - 1, -1, -1): - */ - __pyx_v_f_stride = 0; - - /* "View.MemoryView":1121 - * cdef Py_ssize_t f_stride = 0 - * - * for i in range(ndim - 1, -1, -1): # <<<<<<<<<<<<<< - * if mslice.shape[i] > 1: - * c_stride = mslice.strides[i] - */ - for (__pyx_t_1 = (__pyx_v_ndim - 1); __pyx_t_1 > -1; __pyx_t_1-=1) { - __pyx_v_i = __pyx_t_1; - - /* "View.MemoryView":1122 - * - * for i in range(ndim - 1, -1, -1): - * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< - * c_stride = mslice.strides[i] - * break - */ - __pyx_t_2 = ((__pyx_v_mslice->shape[__pyx_v_i]) > 1); - if (__pyx_t_2) { - - /* "View.MemoryView":1123 - * for i in range(ndim - 1, -1, -1): - * if mslice.shape[i] > 1: - * c_stride = mslice.strides[i] # <<<<<<<<<<<<<< - * break - * - */ - __pyx_v_c_stride = (__pyx_v_mslice->strides[__pyx_v_i]); - - /* "View.MemoryView":1124 - * if mslice.shape[i] > 1: - * c_stride = mslice.strides[i] - * break # <<<<<<<<<<<<<< - * - * for i in range(ndim): - */ - goto __pyx_L4_break; - - /* "View.MemoryView":1122 - * - * for i in range(ndim - 1, -1, -1): - * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< - * c_stride = mslice.strides[i] - * break - */ - } - } - __pyx_L4_break:; - - /* "View.MemoryView":1126 - * break - * - * for i in range(ndim): # <<<<<<<<<<<<<< - * if mslice.shape[i] > 1: - * f_stride = mslice.strides[i] - */ - __pyx_t_1 = __pyx_v_ndim; - __pyx_t_3 = __pyx_t_1; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "View.MemoryView":1127 - * - * for i in range(ndim): - * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< - * f_stride = mslice.strides[i] - * break - */ - __pyx_t_2 = ((__pyx_v_mslice->shape[__pyx_v_i]) > 1); - if (__pyx_t_2) { - - /* "View.MemoryView":1128 - * for i in range(ndim): - * if mslice.shape[i] > 1: - * f_stride = mslice.strides[i] # <<<<<<<<<<<<<< - * break - * - */ - __pyx_v_f_stride = (__pyx_v_mslice->strides[__pyx_v_i]); - - /* "View.MemoryView":1129 - * if mslice.shape[i] > 1: - * f_stride = mslice.strides[i] - * break # <<<<<<<<<<<<<< - * - * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): - */ - goto __pyx_L7_break; - - /* "View.MemoryView":1127 - * - * for i in range(ndim): - * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< - * f_stride = mslice.strides[i] - * break - */ - } - } - __pyx_L7_break:; - - /* "View.MemoryView":1131 - * break - * - * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): # <<<<<<<<<<<<<< - * return 'C' - * else: - */ - __pyx_t_2 = (abs_py_ssize_t(__pyx_v_c_stride) <= abs_py_ssize_t(__pyx_v_f_stride)); - if (__pyx_t_2) { - - /* "View.MemoryView":1132 - * - * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): - * return 'C' # <<<<<<<<<<<<<< - * else: - * return 'F' - */ - __pyx_r = 'C'; - goto __pyx_L0; - - /* "View.MemoryView":1131 - * break - * - * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): # <<<<<<<<<<<<<< - * return 'C' - * else: - */ - } - - /* "View.MemoryView":1134 - * return 'C' - * else: - * return 'F' # <<<<<<<<<<<<<< - * - * @cython.cdivision(True) - */ - /*else*/ { - __pyx_r = 'F'; - goto __pyx_L0; - } - - /* "View.MemoryView":1113 - * - * @cname('__pyx_get_best_slice_order') - * cdef char get_best_order(__Pyx_memviewslice *mslice, int ndim) noexcept nogil: # <<<<<<<<<<<<<< - * """ - * Figure out the best memory access order for a given slice. - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "View.MemoryView":1137 - * - * @cython.cdivision(True) - * cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides, # <<<<<<<<<<<<<< - * char *dst_data, Py_ssize_t *dst_strides, - * Py_ssize_t *src_shape, Py_ssize_t *dst_shape, - */ - -static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v_src_strides, char *__pyx_v_dst_data, Py_ssize_t *__pyx_v_dst_strides, Py_ssize_t *__pyx_v_src_shape, Py_ssize_t *__pyx_v_dst_shape, int __pyx_v_ndim, size_t __pyx_v_itemsize) { - CYTHON_UNUSED Py_ssize_t __pyx_v_i; - CYTHON_UNUSED Py_ssize_t __pyx_v_src_extent; - Py_ssize_t __pyx_v_dst_extent; - Py_ssize_t __pyx_v_src_stride; - Py_ssize_t __pyx_v_dst_stride; - int __pyx_t_1; - int __pyx_t_2; - Py_ssize_t __pyx_t_3; - Py_ssize_t __pyx_t_4; - Py_ssize_t __pyx_t_5; - - /* "View.MemoryView":1144 - * - * cdef Py_ssize_t i - * cdef Py_ssize_t src_extent = src_shape[0] # <<<<<<<<<<<<<< - * cdef Py_ssize_t dst_extent = dst_shape[0] - * cdef Py_ssize_t src_stride = src_strides[0] - */ - __pyx_v_src_extent = (__pyx_v_src_shape[0]); - - /* "View.MemoryView":1145 - * cdef Py_ssize_t i - * cdef Py_ssize_t src_extent = src_shape[0] - * cdef Py_ssize_t dst_extent = dst_shape[0] # <<<<<<<<<<<<<< - * cdef Py_ssize_t src_stride = src_strides[0] - * cdef Py_ssize_t dst_stride = dst_strides[0] - */ - __pyx_v_dst_extent = (__pyx_v_dst_shape[0]); - - /* "View.MemoryView":1146 - * cdef Py_ssize_t src_extent = src_shape[0] - * cdef Py_ssize_t dst_extent = dst_shape[0] - * cdef Py_ssize_t src_stride = src_strides[0] # <<<<<<<<<<<<<< - * cdef Py_ssize_t dst_stride = dst_strides[0] - * - */ - __pyx_v_src_stride = (__pyx_v_src_strides[0]); - - /* "View.MemoryView":1147 - * cdef Py_ssize_t dst_extent = dst_shape[0] - * cdef Py_ssize_t src_stride = src_strides[0] - * cdef Py_ssize_t dst_stride = dst_strides[0] # <<<<<<<<<<<<<< - * - * if ndim == 1: - */ - __pyx_v_dst_stride = (__pyx_v_dst_strides[0]); - - /* "View.MemoryView":1149 - * cdef Py_ssize_t dst_stride = dst_strides[0] - * - * if ndim == 1: # <<<<<<<<<<<<<< - * if (src_stride > 0 and dst_stride > 0 and - * src_stride == itemsize == dst_stride): - */ - __pyx_t_1 = (__pyx_v_ndim == 1); - if (__pyx_t_1) { - - /* "View.MemoryView":1150 - * - * if ndim == 1: - * if (src_stride > 0 and dst_stride > 0 and # <<<<<<<<<<<<<< - * src_stride == itemsize == dst_stride): - * memcpy(dst_data, src_data, itemsize * dst_extent) - */ - __pyx_t_2 = (__pyx_v_src_stride > 0); - if (__pyx_t_2) { - } else { - __pyx_t_1 = __pyx_t_2; - goto __pyx_L5_bool_binop_done; - } - __pyx_t_2 = (__pyx_v_dst_stride > 0); - if (__pyx_t_2) { - } else { - __pyx_t_1 = __pyx_t_2; - goto __pyx_L5_bool_binop_done; - } - - /* "View.MemoryView":1151 - * if ndim == 1: - * if (src_stride > 0 and dst_stride > 0 and - * src_stride == itemsize == dst_stride): # <<<<<<<<<<<<<< - * memcpy(dst_data, src_data, itemsize * dst_extent) - * else: - */ - __pyx_t_2 = (((size_t)__pyx_v_src_stride) == __pyx_v_itemsize); - if (__pyx_t_2) { - __pyx_t_2 = (__pyx_v_itemsize == ((size_t)__pyx_v_dst_stride)); - } - __pyx_t_1 = __pyx_t_2; - __pyx_L5_bool_binop_done:; - - /* "View.MemoryView":1150 - * - * if ndim == 1: - * if (src_stride > 0 and dst_stride > 0 and # <<<<<<<<<<<<<< - * src_stride == itemsize == dst_stride): - * memcpy(dst_data, src_data, itemsize * dst_extent) - */ - if (__pyx_t_1) { - - /* "View.MemoryView":1152 - * if (src_stride > 0 and dst_stride > 0 and - * src_stride == itemsize == dst_stride): - * memcpy(dst_data, src_data, itemsize * dst_extent) # <<<<<<<<<<<<<< - * else: - * for i in range(dst_extent): - */ - (void)(memcpy(__pyx_v_dst_data, __pyx_v_src_data, (__pyx_v_itemsize * __pyx_v_dst_extent))); - - /* "View.MemoryView":1150 - * - * if ndim == 1: - * if (src_stride > 0 and dst_stride > 0 and # <<<<<<<<<<<<<< - * src_stride == itemsize == dst_stride): - * memcpy(dst_data, src_data, itemsize * dst_extent) - */ - goto __pyx_L4; - } - - /* "View.MemoryView":1154 - * memcpy(dst_data, src_data, itemsize * dst_extent) - * else: - * for i in range(dst_extent): # <<<<<<<<<<<<<< - * memcpy(dst_data, src_data, itemsize) - * src_data += src_stride - */ - /*else*/ { - __pyx_t_3 = __pyx_v_dst_extent; - __pyx_t_4 = __pyx_t_3; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_i = __pyx_t_5; - - /* "View.MemoryView":1155 - * else: - * for i in range(dst_extent): - * memcpy(dst_data, src_data, itemsize) # <<<<<<<<<<<<<< - * src_data += src_stride - * dst_data += dst_stride - */ - (void)(memcpy(__pyx_v_dst_data, __pyx_v_src_data, __pyx_v_itemsize)); - - /* "View.MemoryView":1156 - * for i in range(dst_extent): - * memcpy(dst_data, src_data, itemsize) - * src_data += src_stride # <<<<<<<<<<<<<< - * dst_data += dst_stride - * else: - */ - __pyx_v_src_data = (__pyx_v_src_data + __pyx_v_src_stride); - - /* "View.MemoryView":1157 - * memcpy(dst_data, src_data, itemsize) - * src_data += src_stride - * dst_data += dst_stride # <<<<<<<<<<<<<< - * else: - * for i in range(dst_extent): - */ - __pyx_v_dst_data = (__pyx_v_dst_data + __pyx_v_dst_stride); - } - } - __pyx_L4:; - - /* "View.MemoryView":1149 - * cdef Py_ssize_t dst_stride = dst_strides[0] - * - * if ndim == 1: # <<<<<<<<<<<<<< - * if (src_stride > 0 and dst_stride > 0 and - * src_stride == itemsize == dst_stride): - */ - goto __pyx_L3; - } - - /* "View.MemoryView":1159 - * dst_data += dst_stride - * else: - * for i in range(dst_extent): # <<<<<<<<<<<<<< - * _copy_strided_to_strided(src_data, src_strides + 1, - * dst_data, dst_strides + 1, - */ - /*else*/ { - __pyx_t_3 = __pyx_v_dst_extent; - __pyx_t_4 = __pyx_t_3; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { - __pyx_v_i = __pyx_t_5; - - /* "View.MemoryView":1160 - * else: - * for i in range(dst_extent): - * _copy_strided_to_strided(src_data, src_strides + 1, # <<<<<<<<<<<<<< - * dst_data, dst_strides + 1, - * src_shape + 1, dst_shape + 1, - */ - _copy_strided_to_strided(__pyx_v_src_data, (__pyx_v_src_strides + 1), __pyx_v_dst_data, (__pyx_v_dst_strides + 1), (__pyx_v_src_shape + 1), (__pyx_v_dst_shape + 1), (__pyx_v_ndim - 1), __pyx_v_itemsize); - - /* "View.MemoryView":1164 - * src_shape + 1, dst_shape + 1, - * ndim - 1, itemsize) - * src_data += src_stride # <<<<<<<<<<<<<< - * dst_data += dst_stride - * - */ - __pyx_v_src_data = (__pyx_v_src_data + __pyx_v_src_stride); - - /* "View.MemoryView":1165 - * ndim - 1, itemsize) - * src_data += src_stride - * dst_data += dst_stride # <<<<<<<<<<<<<< - * - * cdef void copy_strided_to_strided(__Pyx_memviewslice *src, - */ - __pyx_v_dst_data = (__pyx_v_dst_data + __pyx_v_dst_stride); - } - } - __pyx_L3:; - - /* "View.MemoryView":1137 - * - * @cython.cdivision(True) - * cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides, # <<<<<<<<<<<<<< - * char *dst_data, Py_ssize_t *dst_strides, - * Py_ssize_t *src_shape, Py_ssize_t *dst_shape, - */ - - /* function exit code */ -} - -/* "View.MemoryView":1167 - * dst_data += dst_stride - * - * cdef void copy_strided_to_strided(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< - * __Pyx_memviewslice *dst, - * int ndim, size_t itemsize) noexcept nogil: - */ - -static void copy_strided_to_strided(__Pyx_memviewslice *__pyx_v_src, __Pyx_memviewslice *__pyx_v_dst, int __pyx_v_ndim, size_t __pyx_v_itemsize) { - - /* "View.MemoryView":1170 - * __Pyx_memviewslice *dst, - * int ndim, size_t itemsize) noexcept nogil: - * _copy_strided_to_strided(src.data, src.strides, dst.data, dst.strides, # <<<<<<<<<<<<<< - * src.shape, dst.shape, ndim, itemsize) - * - */ - _copy_strided_to_strided(__pyx_v_src->data, __pyx_v_src->strides, __pyx_v_dst->data, __pyx_v_dst->strides, __pyx_v_src->shape, __pyx_v_dst->shape, __pyx_v_ndim, __pyx_v_itemsize); - - /* "View.MemoryView":1167 - * dst_data += dst_stride - * - * cdef void copy_strided_to_strided(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< - * __Pyx_memviewslice *dst, - * int ndim, size_t itemsize) noexcept nogil: - */ - - /* function exit code */ -} - -/* "View.MemoryView":1174 - * - * @cname('__pyx_memoryview_slice_get_size') - * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) noexcept nogil: # <<<<<<<<<<<<<< - * "Return the size of the memory occupied by the slice in number of bytes" - * cdef Py_ssize_t shape, size = src.memview.view.itemsize - */ - -static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_src, int __pyx_v_ndim) { - Py_ssize_t __pyx_v_shape; - Py_ssize_t __pyx_v_size; - Py_ssize_t __pyx_r; - Py_ssize_t __pyx_t_1; - Py_ssize_t *__pyx_t_2; - Py_ssize_t *__pyx_t_3; - Py_ssize_t *__pyx_t_4; - - /* "View.MemoryView":1176 - * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) noexcept nogil: - * "Return the size of the memory occupied by the slice in number of bytes" - * cdef Py_ssize_t shape, size = src.memview.view.itemsize # <<<<<<<<<<<<<< - * - * for shape in src.shape[:ndim]: - */ - __pyx_t_1 = __pyx_v_src->memview->view.itemsize; - __pyx_v_size = __pyx_t_1; - - /* "View.MemoryView":1178 - * cdef Py_ssize_t shape, size = src.memview.view.itemsize - * - * for shape in src.shape[:ndim]: # <<<<<<<<<<<<<< - * size *= shape - * - */ - __pyx_t_3 = (__pyx_v_src->shape + __pyx_v_ndim); - for (__pyx_t_4 = __pyx_v_src->shape; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) { - __pyx_t_2 = __pyx_t_4; - __pyx_v_shape = (__pyx_t_2[0]); - - /* "View.MemoryView":1179 - * - * for shape in src.shape[:ndim]: - * size *= shape # <<<<<<<<<<<<<< - * - * return size - */ - __pyx_v_size = (__pyx_v_size * __pyx_v_shape); - } - - /* "View.MemoryView":1181 - * size *= shape - * - * return size # <<<<<<<<<<<<<< - * - * @cname('__pyx_fill_contig_strides_array') - */ - __pyx_r = __pyx_v_size; - goto __pyx_L0; - - /* "View.MemoryView":1174 - * - * @cname('__pyx_memoryview_slice_get_size') - * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) noexcept nogil: # <<<<<<<<<<<<<< - * "Return the size of the memory occupied by the slice in number of bytes" - * cdef Py_ssize_t shape, size = src.memview.view.itemsize - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "View.MemoryView":1184 - * - * @cname('__pyx_fill_contig_strides_array') - * cdef Py_ssize_t fill_contig_strides_array( # <<<<<<<<<<<<<< - * Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride, - * int ndim, char order) noexcept nogil: - */ - -static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, Py_ssize_t __pyx_v_stride, int __pyx_v_ndim, char __pyx_v_order) { - int __pyx_v_idx; - Py_ssize_t __pyx_r; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - - /* "View.MemoryView":1193 - * cdef int idx - * - * if order == 'F': # <<<<<<<<<<<<<< - * for idx in range(ndim): - * strides[idx] = stride - */ - __pyx_t_1 = (__pyx_v_order == 'F'); - if (__pyx_t_1) { - - /* "View.MemoryView":1194 - * - * if order == 'F': - * for idx in range(ndim): # <<<<<<<<<<<<<< - * strides[idx] = stride - * stride *= shape[idx] - */ - __pyx_t_2 = __pyx_v_ndim; - __pyx_t_3 = __pyx_t_2; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_idx = __pyx_t_4; - - /* "View.MemoryView":1195 - * if order == 'F': - * for idx in range(ndim): - * strides[idx] = stride # <<<<<<<<<<<<<< - * stride *= shape[idx] - * else: - */ - (__pyx_v_strides[__pyx_v_idx]) = __pyx_v_stride; - - /* "View.MemoryView":1196 - * for idx in range(ndim): - * strides[idx] = stride - * stride *= shape[idx] # <<<<<<<<<<<<<< - * else: - * for idx in range(ndim - 1, -1, -1): - */ - __pyx_v_stride = (__pyx_v_stride * (__pyx_v_shape[__pyx_v_idx])); - } - - /* "View.MemoryView":1193 - * cdef int idx - * - * if order == 'F': # <<<<<<<<<<<<<< - * for idx in range(ndim): - * strides[idx] = stride - */ - goto __pyx_L3; - } - - /* "View.MemoryView":1198 - * stride *= shape[idx] - * else: - * for idx in range(ndim - 1, -1, -1): # <<<<<<<<<<<<<< - * strides[idx] = stride - * stride *= shape[idx] - */ - /*else*/ { - for (__pyx_t_2 = (__pyx_v_ndim - 1); __pyx_t_2 > -1; __pyx_t_2-=1) { - __pyx_v_idx = __pyx_t_2; - - /* "View.MemoryView":1199 - * else: - * for idx in range(ndim - 1, -1, -1): - * strides[idx] = stride # <<<<<<<<<<<<<< - * stride *= shape[idx] - * - */ - (__pyx_v_strides[__pyx_v_idx]) = __pyx_v_stride; - - /* "View.MemoryView":1200 - * for idx in range(ndim - 1, -1, -1): - * strides[idx] = stride - * stride *= shape[idx] # <<<<<<<<<<<<<< - * - * return stride - */ - __pyx_v_stride = (__pyx_v_stride * (__pyx_v_shape[__pyx_v_idx])); - } - } - __pyx_L3:; - - /* "View.MemoryView":1202 - * stride *= shape[idx] - * - * return stride # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_copy_data_to_temp') - */ - __pyx_r = __pyx_v_stride; - goto __pyx_L0; - - /* "View.MemoryView":1184 - * - * @cname('__pyx_fill_contig_strides_array') - * cdef Py_ssize_t fill_contig_strides_array( # <<<<<<<<<<<<<< - * Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t stride, - * int ndim, char order) noexcept nogil: - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "View.MemoryView":1205 - * - * @cname('__pyx_memoryview_copy_data_to_temp') - * cdef void *copy_data_to_temp(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< - * __Pyx_memviewslice *tmpslice, - * char order, - */ - -static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __Pyx_memviewslice *__pyx_v_tmpslice, char __pyx_v_order, int __pyx_v_ndim) { - int __pyx_v_i; - void *__pyx_v_result; - size_t __pyx_v_itemsize; - size_t __pyx_v_size; - void *__pyx_r; - __Pyx_RefNannyDeclarations - Py_ssize_t __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - struct __pyx_memoryview_obj *__pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save; - #endif - __Pyx_RefNannySetupContext("copy_data_to_temp", 1); - - /* "View.MemoryView":1216 - * cdef void *result - * - * cdef size_t itemsize = src.memview.view.itemsize # <<<<<<<<<<<<<< - * cdef size_t size = slice_get_size(src, ndim) - * - */ - __pyx_t_1 = __pyx_v_src->memview->view.itemsize; - __pyx_v_itemsize = __pyx_t_1; - - /* "View.MemoryView":1217 - * - * cdef size_t itemsize = src.memview.view.itemsize - * cdef size_t size = slice_get_size(src, ndim) # <<<<<<<<<<<<<< - * - * result = malloc(size) - */ - __pyx_v_size = __pyx_memoryview_slice_get_size(__pyx_v_src, __pyx_v_ndim); - - /* "View.MemoryView":1219 - * cdef size_t size = slice_get_size(src, ndim) - * - * result = malloc(size) # <<<<<<<<<<<<<< - * if not result: - * _err_no_memory() - */ - __pyx_v_result = malloc(__pyx_v_size); - - /* "View.MemoryView":1220 - * - * result = malloc(size) - * if not result: # <<<<<<<<<<<<<< - * _err_no_memory() - * - */ - __pyx_t_2 = (!(__pyx_v_result != 0)); - if (__pyx_t_2) { - - /* "View.MemoryView":1221 - * result = malloc(size) - * if not result: - * _err_no_memory() # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_3 = __pyx_memoryview_err_no_memory(); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 1221, __pyx_L1_error) - - /* "View.MemoryView":1220 - * - * result = malloc(size) - * if not result: # <<<<<<<<<<<<<< - * _err_no_memory() - * - */ - } - - /* "View.MemoryView":1224 - * - * - * tmpslice.data = result # <<<<<<<<<<<<<< - * tmpslice.memview = src.memview - * for i in range(ndim): - */ - __pyx_v_tmpslice->data = ((char *)__pyx_v_result); - - /* "View.MemoryView":1225 - * - * tmpslice.data = result - * tmpslice.memview = src.memview # <<<<<<<<<<<<<< - * for i in range(ndim): - * tmpslice.shape[i] = src.shape[i] - */ - __pyx_t_4 = __pyx_v_src->memview; - __pyx_v_tmpslice->memview = __pyx_t_4; - - /* "View.MemoryView":1226 - * tmpslice.data = result - * tmpslice.memview = src.memview - * for i in range(ndim): # <<<<<<<<<<<<<< - * tmpslice.shape[i] = src.shape[i] - * tmpslice.suboffsets[i] = -1 - */ - __pyx_t_3 = __pyx_v_ndim; - __pyx_t_5 = __pyx_t_3; - for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { - __pyx_v_i = __pyx_t_6; - - /* "View.MemoryView":1227 - * tmpslice.memview = src.memview - * for i in range(ndim): - * tmpslice.shape[i] = src.shape[i] # <<<<<<<<<<<<<< - * tmpslice.suboffsets[i] = -1 - * - */ - (__pyx_v_tmpslice->shape[__pyx_v_i]) = (__pyx_v_src->shape[__pyx_v_i]); - - /* "View.MemoryView":1228 - * for i in range(ndim): - * tmpslice.shape[i] = src.shape[i] - * tmpslice.suboffsets[i] = -1 # <<<<<<<<<<<<<< - * - * fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize, ndim, order) - */ - (__pyx_v_tmpslice->suboffsets[__pyx_v_i]) = -1L; - } - - /* "View.MemoryView":1230 - * tmpslice.suboffsets[i] = -1 - * - * fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize, ndim, order) # <<<<<<<<<<<<<< - * - * - */ - (void)(__pyx_fill_contig_strides_array((&(__pyx_v_tmpslice->shape[0])), (&(__pyx_v_tmpslice->strides[0])), __pyx_v_itemsize, __pyx_v_ndim, __pyx_v_order)); - - /* "View.MemoryView":1233 - * - * - * for i in range(ndim): # <<<<<<<<<<<<<< - * if tmpslice.shape[i] == 1: - * tmpslice.strides[i] = 0 - */ - __pyx_t_3 = __pyx_v_ndim; - __pyx_t_5 = __pyx_t_3; - for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { - __pyx_v_i = __pyx_t_6; - - /* "View.MemoryView":1234 - * - * for i in range(ndim): - * if tmpslice.shape[i] == 1: # <<<<<<<<<<<<<< - * tmpslice.strides[i] = 0 - * - */ - __pyx_t_2 = ((__pyx_v_tmpslice->shape[__pyx_v_i]) == 1); - if (__pyx_t_2) { - - /* "View.MemoryView":1235 - * for i in range(ndim): - * if tmpslice.shape[i] == 1: - * tmpslice.strides[i] = 0 # <<<<<<<<<<<<<< - * - * if slice_is_contig(src[0], order, ndim): - */ - (__pyx_v_tmpslice->strides[__pyx_v_i]) = 0; - - /* "View.MemoryView":1234 - * - * for i in range(ndim): - * if tmpslice.shape[i] == 1: # <<<<<<<<<<<<<< - * tmpslice.strides[i] = 0 - * - */ - } - } - - /* "View.MemoryView":1237 - * tmpslice.strides[i] = 0 - * - * if slice_is_contig(src[0], order, ndim): # <<<<<<<<<<<<<< - * memcpy(result, src.data, size) - * else: - */ - __pyx_t_2 = __pyx_memviewslice_is_contig((__pyx_v_src[0]), __pyx_v_order, __pyx_v_ndim); - if (__pyx_t_2) { - - /* "View.MemoryView":1238 - * - * if slice_is_contig(src[0], order, ndim): - * memcpy(result, src.data, size) # <<<<<<<<<<<<<< - * else: - * copy_strided_to_strided(src, tmpslice, ndim, itemsize) - */ - (void)(memcpy(__pyx_v_result, __pyx_v_src->data, __pyx_v_size)); - - /* "View.MemoryView":1237 - * tmpslice.strides[i] = 0 - * - * if slice_is_contig(src[0], order, ndim): # <<<<<<<<<<<<<< - * memcpy(result, src.data, size) - * else: - */ - goto __pyx_L9; - } - - /* "View.MemoryView":1240 - * memcpy(result, src.data, size) - * else: - * copy_strided_to_strided(src, tmpslice, ndim, itemsize) # <<<<<<<<<<<<<< - * - * return result - */ - /*else*/ { - copy_strided_to_strided(__pyx_v_src, __pyx_v_tmpslice, __pyx_v_ndim, __pyx_v_itemsize); - } - __pyx_L9:; - - /* "View.MemoryView":1242 - * copy_strided_to_strided(src, tmpslice, ndim, itemsize) - * - * return result # <<<<<<<<<<<<<< - * - * - */ - __pyx_r = __pyx_v_result; - goto __pyx_L0; - - /* "View.MemoryView":1205 - * - * @cname('__pyx_memoryview_copy_data_to_temp') - * cdef void *copy_data_to_temp(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< - * __Pyx_memviewslice *tmpslice, - * char order, - */ - - /* function exit code */ - __pyx_L1_error:; - #ifdef WITH_THREAD - __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_AddTraceback("View.MemoryView.copy_data_to_temp", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __pyx_L0:; - __Pyx_RefNannyFinishContextNogil() - return __pyx_r; -} - -/* "View.MemoryView":1247 - * - * @cname('__pyx_memoryview_err_extents') - * cdef int _err_extents(int i, Py_ssize_t extent1, # <<<<<<<<<<<<<< - * Py_ssize_t extent2) except -1 with gil: - * raise ValueError, f"got differing extents in dimension {i} (got {extent1} and {extent2})" - */ - -static int __pyx_memoryview_err_extents(int __pyx_v_i, Py_ssize_t __pyx_v_extent1, Py_ssize_t __pyx_v_extent2) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - Py_ssize_t __pyx_t_2; - Py_UCS4 __pyx_t_3; - PyObject *__pyx_t_4 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_RefNannySetupContext("_err_extents", 0); - - /* "View.MemoryView":1249 - * cdef int _err_extents(int i, Py_ssize_t extent1, - * Py_ssize_t extent2) except -1 with gil: - * raise ValueError, f"got differing extents in dimension {i} (got {extent1} and {extent2})" # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_err_dim') - */ - __pyx_t_1 = PyTuple_New(7); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1249, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = 0; - __pyx_t_3 = 127; - __Pyx_INCREF(__pyx_kp_u_got_differing_extents_in_dimensi); - __pyx_t_2 += 35; - __Pyx_GIVEREF(__pyx_kp_u_got_differing_extents_in_dimensi); - PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_kp_u_got_differing_extents_in_dimensi); - __pyx_t_4 = __Pyx_PyUnicode_From_int(__pyx_v_i, 0, ' ', 'd'); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1249, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_4); - __pyx_t_4 = 0; - __Pyx_INCREF(__pyx_kp_u_got); - __pyx_t_2 += 6; - __Pyx_GIVEREF(__pyx_kp_u_got); - PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_kp_u_got); - __pyx_t_4 = __Pyx_PyUnicode_From_Py_ssize_t(__pyx_v_extent1, 0, ' ', 'd'); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1249, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_1, 3, __pyx_t_4); - __pyx_t_4 = 0; - __Pyx_INCREF(__pyx_kp_u_and); - __pyx_t_2 += 5; - __Pyx_GIVEREF(__pyx_kp_u_and); - PyTuple_SET_ITEM(__pyx_t_1, 4, __pyx_kp_u_and); - __pyx_t_4 = __Pyx_PyUnicode_From_Py_ssize_t(__pyx_v_extent2, 0, ' ', 'd'); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1249, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_2 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_1, 5, __pyx_t_4); - __pyx_t_4 = 0; - __Pyx_INCREF(__pyx_kp_u__7); - __pyx_t_2 += 1; - __Pyx_GIVEREF(__pyx_kp_u__7); - PyTuple_SET_ITEM(__pyx_t_1, 6, __pyx_kp_u__7); - __pyx_t_4 = __Pyx_PyUnicode_Join(__pyx_t_1, 7, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1249, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(__pyx_builtin_ValueError, __pyx_t_4, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __PYX_ERR(1, 1249, __pyx_L1_error) - - /* "View.MemoryView":1247 - * - * @cname('__pyx_memoryview_err_extents') - * cdef int _err_extents(int i, Py_ssize_t extent1, # <<<<<<<<<<<<<< - * Py_ssize_t extent2) except -1 with gil: - * raise ValueError, f"got differing extents in dimension {i} (got {extent1} and {extent2})" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("View.MemoryView._err_extents", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __Pyx_RefNannyFinishContext(); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - return __pyx_r; -} - -/* "View.MemoryView":1252 - * - * @cname('__pyx_memoryview_err_dim') - * cdef int _err_dim(PyObject *error, str msg, int dim) except -1 with gil: # <<<<<<<<<<<<<< - * raise error, msg % dim - * - */ - -static int __pyx_memoryview_err_dim(PyObject *__pyx_v_error, PyObject *__pyx_v_msg, int __pyx_v_dim) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_RefNannySetupContext("_err_dim", 0); - __Pyx_INCREF(__pyx_v_msg); - - /* "View.MemoryView":1253 - * @cname('__pyx_memoryview_err_dim') - * cdef int _err_dim(PyObject *error, str msg, int dim) except -1 with gil: - * raise error, msg % dim # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_err') - */ - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_dim); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1253, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyString_FormatSafe(__pyx_v_msg, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1253, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_Raise(((PyObject *)__pyx_v_error), __pyx_t_2, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __PYX_ERR(1, 1253, __pyx_L1_error) - - /* "View.MemoryView":1252 - * - * @cname('__pyx_memoryview_err_dim') - * cdef int _err_dim(PyObject *error, str msg, int dim) except -1 with gil: # <<<<<<<<<<<<<< - * raise error, msg % dim - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("View.MemoryView._err_dim", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __Pyx_XDECREF(__pyx_v_msg); - __Pyx_RefNannyFinishContext(); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - return __pyx_r; -} - -/* "View.MemoryView":1256 - * - * @cname('__pyx_memoryview_err') - * cdef int _err(PyObject *error, str msg) except -1 with gil: # <<<<<<<<<<<<<< - * raise error, msg - * - */ - -static int __pyx_memoryview_err(PyObject *__pyx_v_error, PyObject *__pyx_v_msg) { - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_RefNannySetupContext("_err", 0); - __Pyx_INCREF(__pyx_v_msg); - - /* "View.MemoryView":1257 - * @cname('__pyx_memoryview_err') - * cdef int _err(PyObject *error, str msg) except -1 with gil: - * raise error, msg # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_err_no_memory') - */ - __Pyx_Raise(((PyObject *)__pyx_v_error), __pyx_v_msg, 0, 0); - __PYX_ERR(1, 1257, __pyx_L1_error) - - /* "View.MemoryView":1256 - * - * @cname('__pyx_memoryview_err') - * cdef int _err(PyObject *error, str msg) except -1 with gil: # <<<<<<<<<<<<<< - * raise error, msg - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView._err", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __Pyx_XDECREF(__pyx_v_msg); - __Pyx_RefNannyFinishContext(); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - return __pyx_r; -} - -/* "View.MemoryView":1260 - * - * @cname('__pyx_memoryview_err_no_memory') - * cdef int _err_no_memory() except -1 with gil: # <<<<<<<<<<<<<< - * raise MemoryError - * - */ - -static int __pyx_memoryview_err_no_memory(void) { - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_RefNannySetupContext("_err_no_memory", 0); - - /* "View.MemoryView":1261 - * @cname('__pyx_memoryview_err_no_memory') - * cdef int _err_no_memory() except -1 with gil: - * raise MemoryError # <<<<<<<<<<<<<< - * - * - */ - PyErr_NoMemory(); __PYX_ERR(1, 1261, __pyx_L1_error) - - /* "View.MemoryView":1260 - * - * @cname('__pyx_memoryview_err_no_memory') - * cdef int _err_no_memory() except -1 with gil: # <<<<<<<<<<<<<< - * raise MemoryError - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("View.MemoryView._err_no_memory", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __Pyx_RefNannyFinishContext(); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - return __pyx_r; -} - -/* "View.MemoryView":1265 - * - * @cname('__pyx_memoryview_copy_contents') - * cdef int memoryview_copy_contents(__Pyx_memviewslice src, # <<<<<<<<<<<<<< - * __Pyx_memviewslice dst, - * int src_ndim, int dst_ndim, - */ - -static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_memviewslice __pyx_v_dst, int __pyx_v_src_ndim, int __pyx_v_dst_ndim, int __pyx_v_dtype_is_object) { - void *__pyx_v_tmpdata; - size_t __pyx_v_itemsize; - int __pyx_v_i; - char __pyx_v_order; - int __pyx_v_broadcasting; - int __pyx_v_direct_copy; - __Pyx_memviewslice __pyx_v_tmp; - int __pyx_v_ndim; - int __pyx_r; - __Pyx_RefNannyDeclarations - Py_ssize_t __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - void *__pyx_t_7; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save; - #endif - __Pyx_RefNannySetupContext("memoryview_copy_contents", 1); - - /* "View.MemoryView":1273 - * Check for overlapping memory and verify the shapes. - * """ - * cdef void *tmpdata = NULL # <<<<<<<<<<<<<< - * cdef size_t itemsize = src.memview.view.itemsize - * cdef int i - */ - __pyx_v_tmpdata = NULL; - - /* "View.MemoryView":1274 - * """ - * cdef void *tmpdata = NULL - * cdef size_t itemsize = src.memview.view.itemsize # <<<<<<<<<<<<<< - * cdef int i - * cdef char order = get_best_order(&src, src_ndim) - */ - __pyx_t_1 = __pyx_v_src.memview->view.itemsize; - __pyx_v_itemsize = __pyx_t_1; - - /* "View.MemoryView":1276 - * cdef size_t itemsize = src.memview.view.itemsize - * cdef int i - * cdef char order = get_best_order(&src, src_ndim) # <<<<<<<<<<<<<< - * cdef bint broadcasting = False - * cdef bint direct_copy = False - */ - __pyx_v_order = __pyx_get_best_slice_order((&__pyx_v_src), __pyx_v_src_ndim); - - /* "View.MemoryView":1277 - * cdef int i - * cdef char order = get_best_order(&src, src_ndim) - * cdef bint broadcasting = False # <<<<<<<<<<<<<< - * cdef bint direct_copy = False - * cdef __Pyx_memviewslice tmp - */ - __pyx_v_broadcasting = 0; - - /* "View.MemoryView":1278 - * cdef char order = get_best_order(&src, src_ndim) - * cdef bint broadcasting = False - * cdef bint direct_copy = False # <<<<<<<<<<<<<< - * cdef __Pyx_memviewslice tmp - * - */ - __pyx_v_direct_copy = 0; - - /* "View.MemoryView":1281 - * cdef __Pyx_memviewslice tmp - * - * if src_ndim < dst_ndim: # <<<<<<<<<<<<<< - * broadcast_leading(&src, src_ndim, dst_ndim) - * elif dst_ndim < src_ndim: - */ - __pyx_t_2 = (__pyx_v_src_ndim < __pyx_v_dst_ndim); - if (__pyx_t_2) { - - /* "View.MemoryView":1282 - * - * if src_ndim < dst_ndim: - * broadcast_leading(&src, src_ndim, dst_ndim) # <<<<<<<<<<<<<< - * elif dst_ndim < src_ndim: - * broadcast_leading(&dst, dst_ndim, src_ndim) - */ - __pyx_memoryview_broadcast_leading((&__pyx_v_src), __pyx_v_src_ndim, __pyx_v_dst_ndim); - - /* "View.MemoryView":1281 - * cdef __Pyx_memviewslice tmp - * - * if src_ndim < dst_ndim: # <<<<<<<<<<<<<< - * broadcast_leading(&src, src_ndim, dst_ndim) - * elif dst_ndim < src_ndim: - */ - goto __pyx_L3; - } - - /* "View.MemoryView":1283 - * if src_ndim < dst_ndim: - * broadcast_leading(&src, src_ndim, dst_ndim) - * elif dst_ndim < src_ndim: # <<<<<<<<<<<<<< - * broadcast_leading(&dst, dst_ndim, src_ndim) - * - */ - __pyx_t_2 = (__pyx_v_dst_ndim < __pyx_v_src_ndim); - if (__pyx_t_2) { - - /* "View.MemoryView":1284 - * broadcast_leading(&src, src_ndim, dst_ndim) - * elif dst_ndim < src_ndim: - * broadcast_leading(&dst, dst_ndim, src_ndim) # <<<<<<<<<<<<<< - * - * cdef int ndim = max(src_ndim, dst_ndim) - */ - __pyx_memoryview_broadcast_leading((&__pyx_v_dst), __pyx_v_dst_ndim, __pyx_v_src_ndim); - - /* "View.MemoryView":1283 - * if src_ndim < dst_ndim: - * broadcast_leading(&src, src_ndim, dst_ndim) - * elif dst_ndim < src_ndim: # <<<<<<<<<<<<<< - * broadcast_leading(&dst, dst_ndim, src_ndim) - * - */ - } - __pyx_L3:; - - /* "View.MemoryView":1286 - * broadcast_leading(&dst, dst_ndim, src_ndim) - * - * cdef int ndim = max(src_ndim, dst_ndim) # <<<<<<<<<<<<<< - * - * for i in range(ndim): - */ - __pyx_t_3 = __pyx_v_dst_ndim; - __pyx_t_4 = __pyx_v_src_ndim; - __pyx_t_2 = (__pyx_t_3 > __pyx_t_4); - if (__pyx_t_2) { - __pyx_t_5 = __pyx_t_3; - } else { - __pyx_t_5 = __pyx_t_4; - } - __pyx_v_ndim = __pyx_t_5; - - /* "View.MemoryView":1288 - * cdef int ndim = max(src_ndim, dst_ndim) - * - * for i in range(ndim): # <<<<<<<<<<<<<< - * if src.shape[i] != dst.shape[i]: - * if src.shape[i] == 1: - */ - __pyx_t_5 = __pyx_v_ndim; - __pyx_t_3 = __pyx_t_5; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "View.MemoryView":1289 - * - * for i in range(ndim): - * if src.shape[i] != dst.shape[i]: # <<<<<<<<<<<<<< - * if src.shape[i] == 1: - * broadcasting = True - */ - __pyx_t_2 = ((__pyx_v_src.shape[__pyx_v_i]) != (__pyx_v_dst.shape[__pyx_v_i])); - if (__pyx_t_2) { - - /* "View.MemoryView":1290 - * for i in range(ndim): - * if src.shape[i] != dst.shape[i]: - * if src.shape[i] == 1: # <<<<<<<<<<<<<< - * broadcasting = True - * src.strides[i] = 0 - */ - __pyx_t_2 = ((__pyx_v_src.shape[__pyx_v_i]) == 1); - if (__pyx_t_2) { - - /* "View.MemoryView":1291 - * if src.shape[i] != dst.shape[i]: - * if src.shape[i] == 1: - * broadcasting = True # <<<<<<<<<<<<<< - * src.strides[i] = 0 - * else: - */ - __pyx_v_broadcasting = 1; - - /* "View.MemoryView":1292 - * if src.shape[i] == 1: - * broadcasting = True - * src.strides[i] = 0 # <<<<<<<<<<<<<< - * else: - * _err_extents(i, dst.shape[i], src.shape[i]) - */ - (__pyx_v_src.strides[__pyx_v_i]) = 0; - - /* "View.MemoryView":1290 - * for i in range(ndim): - * if src.shape[i] != dst.shape[i]: - * if src.shape[i] == 1: # <<<<<<<<<<<<<< - * broadcasting = True - * src.strides[i] = 0 - */ - goto __pyx_L7; - } - - /* "View.MemoryView":1294 - * src.strides[i] = 0 - * else: - * _err_extents(i, dst.shape[i], src.shape[i]) # <<<<<<<<<<<<<< - * - * if src.suboffsets[i] >= 0: - */ - /*else*/ { - __pyx_t_6 = __pyx_memoryview_err_extents(__pyx_v_i, (__pyx_v_dst.shape[__pyx_v_i]), (__pyx_v_src.shape[__pyx_v_i])); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 1294, __pyx_L1_error) - } - __pyx_L7:; - - /* "View.MemoryView":1289 - * - * for i in range(ndim): - * if src.shape[i] != dst.shape[i]: # <<<<<<<<<<<<<< - * if src.shape[i] == 1: - * broadcasting = True - */ - } - - /* "View.MemoryView":1296 - * _err_extents(i, dst.shape[i], src.shape[i]) - * - * if src.suboffsets[i] >= 0: # <<<<<<<<<<<<<< - * _err_dim(PyExc_ValueError, "Dimension %d is not direct", i) - * - */ - __pyx_t_2 = ((__pyx_v_src.suboffsets[__pyx_v_i]) >= 0); - if (__pyx_t_2) { - - /* "View.MemoryView":1297 - * - * if src.suboffsets[i] >= 0: - * _err_dim(PyExc_ValueError, "Dimension %d is not direct", i) # <<<<<<<<<<<<<< - * - * if slices_overlap(&src, &dst, ndim, itemsize): - */ - __pyx_t_6 = __pyx_memoryview_err_dim(PyExc_ValueError, __pyx_kp_s_Dimension_d_is_not_direct, __pyx_v_i); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 1297, __pyx_L1_error) - - /* "View.MemoryView":1296 - * _err_extents(i, dst.shape[i], src.shape[i]) - * - * if src.suboffsets[i] >= 0: # <<<<<<<<<<<<<< - * _err_dim(PyExc_ValueError, "Dimension %d is not direct", i) - * - */ - } - } - - /* "View.MemoryView":1299 - * _err_dim(PyExc_ValueError, "Dimension %d is not direct", i) - * - * if slices_overlap(&src, &dst, ndim, itemsize): # <<<<<<<<<<<<<< - * - * if not slice_is_contig(src, order, ndim): - */ - __pyx_t_2 = __pyx_slices_overlap((&__pyx_v_src), (&__pyx_v_dst), __pyx_v_ndim, __pyx_v_itemsize); - if (__pyx_t_2) { - - /* "View.MemoryView":1301 - * if slices_overlap(&src, &dst, ndim, itemsize): - * - * if not slice_is_contig(src, order, ndim): # <<<<<<<<<<<<<< - * order = get_best_order(&dst, ndim) - * - */ - __pyx_t_2 = (!__pyx_memviewslice_is_contig(__pyx_v_src, __pyx_v_order, __pyx_v_ndim)); - if (__pyx_t_2) { - - /* "View.MemoryView":1302 - * - * if not slice_is_contig(src, order, ndim): - * order = get_best_order(&dst, ndim) # <<<<<<<<<<<<<< - * - * tmpdata = copy_data_to_temp(&src, &tmp, order, ndim) - */ - __pyx_v_order = __pyx_get_best_slice_order((&__pyx_v_dst), __pyx_v_ndim); - - /* "View.MemoryView":1301 - * if slices_overlap(&src, &dst, ndim, itemsize): - * - * if not slice_is_contig(src, order, ndim): # <<<<<<<<<<<<<< - * order = get_best_order(&dst, ndim) - * - */ - } - - /* "View.MemoryView":1304 - * order = get_best_order(&dst, ndim) - * - * tmpdata = copy_data_to_temp(&src, &tmp, order, ndim) # <<<<<<<<<<<<<< - * src = tmp - * - */ - __pyx_t_7 = __pyx_memoryview_copy_data_to_temp((&__pyx_v_src), (&__pyx_v_tmp), __pyx_v_order, __pyx_v_ndim); if (unlikely(__pyx_t_7 == ((void *)NULL))) __PYX_ERR(1, 1304, __pyx_L1_error) - __pyx_v_tmpdata = __pyx_t_7; - - /* "View.MemoryView":1305 - * - * tmpdata = copy_data_to_temp(&src, &tmp, order, ndim) - * src = tmp # <<<<<<<<<<<<<< - * - * if not broadcasting: - */ - __pyx_v_src = __pyx_v_tmp; - - /* "View.MemoryView":1299 - * _err_dim(PyExc_ValueError, "Dimension %d is not direct", i) - * - * if slices_overlap(&src, &dst, ndim, itemsize): # <<<<<<<<<<<<<< - * - * if not slice_is_contig(src, order, ndim): - */ - } - - /* "View.MemoryView":1307 - * src = tmp - * - * if not broadcasting: # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_2 = (!__pyx_v_broadcasting); - if (__pyx_t_2) { - - /* "View.MemoryView":1310 - * - * - * if slice_is_contig(src, 'C', ndim): # <<<<<<<<<<<<<< - * direct_copy = slice_is_contig(dst, 'C', ndim) - * elif slice_is_contig(src, 'F', ndim): - */ - __pyx_t_2 = __pyx_memviewslice_is_contig(__pyx_v_src, 'C', __pyx_v_ndim); - if (__pyx_t_2) { - - /* "View.MemoryView":1311 - * - * if slice_is_contig(src, 'C', ndim): - * direct_copy = slice_is_contig(dst, 'C', ndim) # <<<<<<<<<<<<<< - * elif slice_is_contig(src, 'F', ndim): - * direct_copy = slice_is_contig(dst, 'F', ndim) - */ - __pyx_v_direct_copy = __pyx_memviewslice_is_contig(__pyx_v_dst, 'C', __pyx_v_ndim); - - /* "View.MemoryView":1310 - * - * - * if slice_is_contig(src, 'C', ndim): # <<<<<<<<<<<<<< - * direct_copy = slice_is_contig(dst, 'C', ndim) - * elif slice_is_contig(src, 'F', ndim): - */ - goto __pyx_L12; - } - - /* "View.MemoryView":1312 - * if slice_is_contig(src, 'C', ndim): - * direct_copy = slice_is_contig(dst, 'C', ndim) - * elif slice_is_contig(src, 'F', ndim): # <<<<<<<<<<<<<< - * direct_copy = slice_is_contig(dst, 'F', ndim) - * - */ - __pyx_t_2 = __pyx_memviewslice_is_contig(__pyx_v_src, 'F', __pyx_v_ndim); - if (__pyx_t_2) { - - /* "View.MemoryView":1313 - * direct_copy = slice_is_contig(dst, 'C', ndim) - * elif slice_is_contig(src, 'F', ndim): - * direct_copy = slice_is_contig(dst, 'F', ndim) # <<<<<<<<<<<<<< - * - * if direct_copy: - */ - __pyx_v_direct_copy = __pyx_memviewslice_is_contig(__pyx_v_dst, 'F', __pyx_v_ndim); - - /* "View.MemoryView":1312 - * if slice_is_contig(src, 'C', ndim): - * direct_copy = slice_is_contig(dst, 'C', ndim) - * elif slice_is_contig(src, 'F', ndim): # <<<<<<<<<<<<<< - * direct_copy = slice_is_contig(dst, 'F', ndim) - * - */ - } - __pyx_L12:; - - /* "View.MemoryView":1315 - * direct_copy = slice_is_contig(dst, 'F', ndim) - * - * if direct_copy: # <<<<<<<<<<<<<< - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - */ - if (__pyx_v_direct_copy) { - - /* "View.MemoryView":1317 - * if direct_copy: - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) # <<<<<<<<<<<<<< - * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - */ - __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 0); - - /* "View.MemoryView":1318 - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) # <<<<<<<<<<<<<< - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - * free(tmpdata) - */ - (void)(memcpy(__pyx_v_dst.data, __pyx_v_src.data, __pyx_memoryview_slice_get_size((&__pyx_v_src), __pyx_v_ndim))); - - /* "View.MemoryView":1319 - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) # <<<<<<<<<<<<<< - * free(tmpdata) - * return 0 - */ - __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 1); - - /* "View.MemoryView":1320 - * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - * free(tmpdata) # <<<<<<<<<<<<<< - * return 0 - * - */ - free(__pyx_v_tmpdata); - - /* "View.MemoryView":1321 - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - * free(tmpdata) - * return 0 # <<<<<<<<<<<<<< - * - * if order == 'F' == get_best_order(&dst, ndim): - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":1315 - * direct_copy = slice_is_contig(dst, 'F', ndim) - * - * if direct_copy: # <<<<<<<<<<<<<< - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - */ - } - - /* "View.MemoryView":1307 - * src = tmp - * - * if not broadcasting: # <<<<<<<<<<<<<< - * - * - */ - } - - /* "View.MemoryView":1323 - * return 0 - * - * if order == 'F' == get_best_order(&dst, ndim): # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_2 = (__pyx_v_order == 'F'); - if (__pyx_t_2) { - __pyx_t_2 = ('F' == __pyx_get_best_slice_order((&__pyx_v_dst), __pyx_v_ndim)); - } - if (__pyx_t_2) { - - /* "View.MemoryView":1326 - * - * - * transpose_memslice(&src) # <<<<<<<<<<<<<< - * transpose_memslice(&dst) - * - */ - __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_src)); if (unlikely(__pyx_t_5 == ((int)-1))) __PYX_ERR(1, 1326, __pyx_L1_error) - - /* "View.MemoryView":1327 - * - * transpose_memslice(&src) - * transpose_memslice(&dst) # <<<<<<<<<<<<<< - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - */ - __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_dst)); if (unlikely(__pyx_t_5 == ((int)-1))) __PYX_ERR(1, 1327, __pyx_L1_error) - - /* "View.MemoryView":1323 - * return 0 - * - * if order == 'F' == get_best_order(&dst, ndim): # <<<<<<<<<<<<<< - * - * - */ - } - - /* "View.MemoryView":1329 - * transpose_memslice(&dst) - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) # <<<<<<<<<<<<<< - * copy_strided_to_strided(&src, &dst, ndim, itemsize) - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - */ - __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 0); - - /* "View.MemoryView":1330 - * - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - * copy_strided_to_strided(&src, &dst, ndim, itemsize) # <<<<<<<<<<<<<< - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - * - */ - copy_strided_to_strided((&__pyx_v_src), (&__pyx_v_dst), __pyx_v_ndim, __pyx_v_itemsize); - - /* "View.MemoryView":1331 - * refcount_copying(&dst, dtype_is_object, ndim, inc=False) - * copy_strided_to_strided(&src, &dst, ndim, itemsize) - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) # <<<<<<<<<<<<<< - * - * free(tmpdata) - */ - __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 1); - - /* "View.MemoryView":1333 - * refcount_copying(&dst, dtype_is_object, ndim, inc=True) - * - * free(tmpdata) # <<<<<<<<<<<<<< - * return 0 - * - */ - free(__pyx_v_tmpdata); - - /* "View.MemoryView":1334 - * - * free(tmpdata) - * return 0 # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_broadcast_leading') - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "View.MemoryView":1265 - * - * @cname('__pyx_memoryview_copy_contents') - * cdef int memoryview_copy_contents(__Pyx_memviewslice src, # <<<<<<<<<<<<<< - * __Pyx_memviewslice dst, - * int src_ndim, int dst_ndim, - */ - - /* function exit code */ - __pyx_L1_error:; - #ifdef WITH_THREAD - __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_AddTraceback("View.MemoryView.memoryview_copy_contents", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif - __pyx_L0:; - __Pyx_RefNannyFinishContextNogil() - return __pyx_r; -} - -/* "View.MemoryView":1337 - * - * @cname('__pyx_memoryview_broadcast_leading') - * cdef void broadcast_leading(__Pyx_memviewslice *mslice, # <<<<<<<<<<<<<< - * int ndim, - * int ndim_other) noexcept nogil: - */ - -static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslice, int __pyx_v_ndim, int __pyx_v_ndim_other) { - int __pyx_v_i; - int __pyx_v_offset; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - - /* "View.MemoryView":1341 - * int ndim_other) noexcept nogil: - * cdef int i - * cdef int offset = ndim_other - ndim # <<<<<<<<<<<<<< - * - * for i in range(ndim - 1, -1, -1): - */ - __pyx_v_offset = (__pyx_v_ndim_other - __pyx_v_ndim); - - /* "View.MemoryView":1343 - * cdef int offset = ndim_other - ndim - * - * for i in range(ndim - 1, -1, -1): # <<<<<<<<<<<<<< - * mslice.shape[i + offset] = mslice.shape[i] - * mslice.strides[i + offset] = mslice.strides[i] - */ - for (__pyx_t_1 = (__pyx_v_ndim - 1); __pyx_t_1 > -1; __pyx_t_1-=1) { - __pyx_v_i = __pyx_t_1; - - /* "View.MemoryView":1344 - * - * for i in range(ndim - 1, -1, -1): - * mslice.shape[i + offset] = mslice.shape[i] # <<<<<<<<<<<<<< - * mslice.strides[i + offset] = mslice.strides[i] - * mslice.suboffsets[i + offset] = mslice.suboffsets[i] - */ - (__pyx_v_mslice->shape[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->shape[__pyx_v_i]); - - /* "View.MemoryView":1345 - * for i in range(ndim - 1, -1, -1): - * mslice.shape[i + offset] = mslice.shape[i] - * mslice.strides[i + offset] = mslice.strides[i] # <<<<<<<<<<<<<< - * mslice.suboffsets[i + offset] = mslice.suboffsets[i] - * - */ - (__pyx_v_mslice->strides[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->strides[__pyx_v_i]); - - /* "View.MemoryView":1346 - * mslice.shape[i + offset] = mslice.shape[i] - * mslice.strides[i + offset] = mslice.strides[i] - * mslice.suboffsets[i + offset] = mslice.suboffsets[i] # <<<<<<<<<<<<<< - * - * for i in range(offset): - */ - (__pyx_v_mslice->suboffsets[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->suboffsets[__pyx_v_i]); - } - - /* "View.MemoryView":1348 - * mslice.suboffsets[i + offset] = mslice.suboffsets[i] - * - * for i in range(offset): # <<<<<<<<<<<<<< - * mslice.shape[i] = 1 - * mslice.strides[i] = mslice.strides[0] - */ - __pyx_t_1 = __pyx_v_offset; - __pyx_t_2 = __pyx_t_1; - for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { - __pyx_v_i = __pyx_t_3; - - /* "View.MemoryView":1349 - * - * for i in range(offset): - * mslice.shape[i] = 1 # <<<<<<<<<<<<<< - * mslice.strides[i] = mslice.strides[0] - * mslice.suboffsets[i] = -1 - */ - (__pyx_v_mslice->shape[__pyx_v_i]) = 1; - - /* "View.MemoryView":1350 - * for i in range(offset): - * mslice.shape[i] = 1 - * mslice.strides[i] = mslice.strides[0] # <<<<<<<<<<<<<< - * mslice.suboffsets[i] = -1 - * - */ - (__pyx_v_mslice->strides[__pyx_v_i]) = (__pyx_v_mslice->strides[0]); - - /* "View.MemoryView":1351 - * mslice.shape[i] = 1 - * mslice.strides[i] = mslice.strides[0] - * mslice.suboffsets[i] = -1 # <<<<<<<<<<<<<< - * - * - */ - (__pyx_v_mslice->suboffsets[__pyx_v_i]) = -1L; - } - - /* "View.MemoryView":1337 - * - * @cname('__pyx_memoryview_broadcast_leading') - * cdef void broadcast_leading(__Pyx_memviewslice *mslice, # <<<<<<<<<<<<<< - * int ndim, - * int ndim_other) noexcept nogil: - */ - - /* function exit code */ -} - -/* "View.MemoryView":1359 - * - * @cname('__pyx_memoryview_refcount_copying') - * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object, int ndim, bint inc) noexcept nogil: # <<<<<<<<<<<<<< - * - * if dtype_is_object: - */ - -static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, int __pyx_v_dtype_is_object, int __pyx_v_ndim, int __pyx_v_inc) { - - /* "View.MemoryView":1361 - * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object, int ndim, bint inc) noexcept nogil: - * - * if dtype_is_object: # <<<<<<<<<<<<<< - * refcount_objects_in_slice_with_gil(dst.data, dst.shape, dst.strides, ndim, inc) - * - */ - if (__pyx_v_dtype_is_object) { - - /* "View.MemoryView":1362 - * - * if dtype_is_object: - * refcount_objects_in_slice_with_gil(dst.data, dst.shape, dst.strides, ndim, inc) # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil') - */ - __pyx_memoryview_refcount_objects_in_slice_with_gil(__pyx_v_dst->data, __pyx_v_dst->shape, __pyx_v_dst->strides, __pyx_v_ndim, __pyx_v_inc); - - /* "View.MemoryView":1361 - * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object, int ndim, bint inc) noexcept nogil: - * - * if dtype_is_object: # <<<<<<<<<<<<<< - * refcount_objects_in_slice_with_gil(dst.data, dst.shape, dst.strides, ndim, inc) - * - */ - } - - /* "View.MemoryView":1359 - * - * @cname('__pyx_memoryview_refcount_copying') - * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object, int ndim, bint inc) noexcept nogil: # <<<<<<<<<<<<<< - * - * if dtype_is_object: - */ - - /* function exit code */ -} - -/* "View.MemoryView":1365 - * - * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil') - * cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< - * Py_ssize_t *strides, int ndim, - * bint inc) noexcept with gil: - */ - -static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *__pyx_v_data, Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, int __pyx_v_ndim, int __pyx_v_inc) { - __Pyx_RefNannyDeclarations - #ifdef WITH_THREAD - PyGILState_STATE __pyx_gilstate_save = __Pyx_PyGILState_Ensure(); - #endif - __Pyx_RefNannySetupContext("refcount_objects_in_slice_with_gil", 0); - - /* "View.MemoryView":1368 - * Py_ssize_t *strides, int ndim, - * bint inc) noexcept with gil: - * refcount_objects_in_slice(data, shape, strides, ndim, inc) # <<<<<<<<<<<<<< - * - * @cname('__pyx_memoryview_refcount_objects_in_slice') - */ - __pyx_memoryview_refcount_objects_in_slice(__pyx_v_data, __pyx_v_shape, __pyx_v_strides, __pyx_v_ndim, __pyx_v_inc); - - /* "View.MemoryView":1365 - * - * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil') - * cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< - * Py_ssize_t *strides, int ndim, - * bint inc) noexcept with gil: - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - #ifdef WITH_THREAD - __Pyx_PyGILState_Release(__pyx_gilstate_save); - #endif -} - -/* "View.MemoryView":1371 - * - * @cname('__pyx_memoryview_refcount_objects_in_slice') - * cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< - * Py_ssize_t *strides, int ndim, bint inc) noexcept: - * cdef Py_ssize_t i - */ - -static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, int __pyx_v_ndim, int __pyx_v_inc) { - CYTHON_UNUSED Py_ssize_t __pyx_v_i; - Py_ssize_t __pyx_v_stride; - __Pyx_RefNannyDeclarations - Py_ssize_t __pyx_t_1; - Py_ssize_t __pyx_t_2; - Py_ssize_t __pyx_t_3; - int __pyx_t_4; - __Pyx_RefNannySetupContext("refcount_objects_in_slice", 0); - - /* "View.MemoryView":1374 - * Py_ssize_t *strides, int ndim, bint inc) noexcept: - * cdef Py_ssize_t i - * cdef Py_ssize_t stride = strides[0] # <<<<<<<<<<<<<< - * - * for i in range(shape[0]): - */ - __pyx_v_stride = (__pyx_v_strides[0]); - - /* "View.MemoryView":1376 - * cdef Py_ssize_t stride = strides[0] - * - * for i in range(shape[0]): # <<<<<<<<<<<<<< - * if ndim == 1: - * if inc: - */ - __pyx_t_1 = (__pyx_v_shape[0]); - __pyx_t_2 = __pyx_t_1; - for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { - __pyx_v_i = __pyx_t_3; - - /* "View.MemoryView":1377 - * - * for i in range(shape[0]): - * if ndim == 1: # <<<<<<<<<<<<<< - * if inc: - * Py_INCREF(( data)[0]) - */ - __pyx_t_4 = (__pyx_v_ndim == 1); - if (__pyx_t_4) { - - /* "View.MemoryView":1378 - * for i in range(shape[0]): - * if ndim == 1: - * if inc: # <<<<<<<<<<<<<< - * Py_INCREF(( data)[0]) - * else: - */ - if (__pyx_v_inc) { - - /* "View.MemoryView":1379 - * if ndim == 1: - * if inc: - * Py_INCREF(( data)[0]) # <<<<<<<<<<<<<< - * else: - * Py_DECREF(( data)[0]) - */ - Py_INCREF((((PyObject **)__pyx_v_data)[0])); - - /* "View.MemoryView":1378 - * for i in range(shape[0]): - * if ndim == 1: - * if inc: # <<<<<<<<<<<<<< - * Py_INCREF(( data)[0]) - * else: - */ - goto __pyx_L6; - } - - /* "View.MemoryView":1381 - * Py_INCREF(( data)[0]) - * else: - * Py_DECREF(( data)[0]) # <<<<<<<<<<<<<< - * else: - * refcount_objects_in_slice(data, shape + 1, strides + 1, ndim - 1, inc) - */ - /*else*/ { - Py_DECREF((((PyObject **)__pyx_v_data)[0])); - } - __pyx_L6:; - - /* "View.MemoryView":1377 - * - * for i in range(shape[0]): - * if ndim == 1: # <<<<<<<<<<<<<< - * if inc: - * Py_INCREF(( data)[0]) - */ - goto __pyx_L5; - } - - /* "View.MemoryView":1383 - * Py_DECREF(( data)[0]) - * else: - * refcount_objects_in_slice(data, shape + 1, strides + 1, ndim - 1, inc) # <<<<<<<<<<<<<< - * - * data += stride - */ - /*else*/ { - __pyx_memoryview_refcount_objects_in_slice(__pyx_v_data, (__pyx_v_shape + 1), (__pyx_v_strides + 1), (__pyx_v_ndim - 1), __pyx_v_inc); - } - __pyx_L5:; - - /* "View.MemoryView":1385 - * refcount_objects_in_slice(data, shape + 1, strides + 1, ndim - 1, inc) - * - * data += stride # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_data = (__pyx_v_data + __pyx_v_stride); - } - - /* "View.MemoryView":1371 - * - * @cname('__pyx_memoryview_refcount_objects_in_slice') - * cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< - * Py_ssize_t *strides, int ndim, bint inc) noexcept: - * cdef Py_ssize_t i - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "View.MemoryView":1391 - * - * @cname('__pyx_memoryview_slice_assign_scalar') - * cdef void slice_assign_scalar(__Pyx_memviewslice *dst, int ndim, # <<<<<<<<<<<<<< - * size_t itemsize, void *item, - * bint dtype_is_object) noexcept nogil: - */ - -static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst, int __pyx_v_ndim, size_t __pyx_v_itemsize, void *__pyx_v_item, int __pyx_v_dtype_is_object) { - - /* "View.MemoryView":1394 - * size_t itemsize, void *item, - * bint dtype_is_object) noexcept nogil: - * refcount_copying(dst, dtype_is_object, ndim, inc=False) # <<<<<<<<<<<<<< - * _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, itemsize, item) - * refcount_copying(dst, dtype_is_object, ndim, inc=True) - */ - __pyx_memoryview_refcount_copying(__pyx_v_dst, __pyx_v_dtype_is_object, __pyx_v_ndim, 0); - - /* "View.MemoryView":1395 - * bint dtype_is_object) noexcept nogil: - * refcount_copying(dst, dtype_is_object, ndim, inc=False) - * _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, itemsize, item) # <<<<<<<<<<<<<< - * refcount_copying(dst, dtype_is_object, ndim, inc=True) - * - */ - __pyx_memoryview__slice_assign_scalar(__pyx_v_dst->data, __pyx_v_dst->shape, __pyx_v_dst->strides, __pyx_v_ndim, __pyx_v_itemsize, __pyx_v_item); - - /* "View.MemoryView":1396 - * refcount_copying(dst, dtype_is_object, ndim, inc=False) - * _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, itemsize, item) - * refcount_copying(dst, dtype_is_object, ndim, inc=True) # <<<<<<<<<<<<<< - * - * - */ - __pyx_memoryview_refcount_copying(__pyx_v_dst, __pyx_v_dtype_is_object, __pyx_v_ndim, 1); - - /* "View.MemoryView":1391 - * - * @cname('__pyx_memoryview_slice_assign_scalar') - * cdef void slice_assign_scalar(__Pyx_memviewslice *dst, int ndim, # <<<<<<<<<<<<<< - * size_t itemsize, void *item, - * bint dtype_is_object) noexcept nogil: - */ - - /* function exit code */ -} - -/* "View.MemoryView":1400 - * - * @cname('__pyx_memoryview__slice_assign_scalar') - * cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< - * Py_ssize_t *strides, int ndim, - * size_t itemsize, void *item) noexcept nogil: - */ - -static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t *__pyx_v_shape, Py_ssize_t *__pyx_v_strides, int __pyx_v_ndim, size_t __pyx_v_itemsize, void *__pyx_v_item) { - CYTHON_UNUSED Py_ssize_t __pyx_v_i; - Py_ssize_t __pyx_v_stride; - Py_ssize_t __pyx_v_extent; - int __pyx_t_1; - Py_ssize_t __pyx_t_2; - Py_ssize_t __pyx_t_3; - Py_ssize_t __pyx_t_4; - - /* "View.MemoryView":1404 - * size_t itemsize, void *item) noexcept nogil: - * cdef Py_ssize_t i - * cdef Py_ssize_t stride = strides[0] # <<<<<<<<<<<<<< - * cdef Py_ssize_t extent = shape[0] - * - */ - __pyx_v_stride = (__pyx_v_strides[0]); - - /* "View.MemoryView":1405 - * cdef Py_ssize_t i - * cdef Py_ssize_t stride = strides[0] - * cdef Py_ssize_t extent = shape[0] # <<<<<<<<<<<<<< - * - * if ndim == 1: - */ - __pyx_v_extent = (__pyx_v_shape[0]); - - /* "View.MemoryView":1407 - * cdef Py_ssize_t extent = shape[0] - * - * if ndim == 1: # <<<<<<<<<<<<<< - * for i in range(extent): - * memcpy(data, item, itemsize) - */ - __pyx_t_1 = (__pyx_v_ndim == 1); - if (__pyx_t_1) { - - /* "View.MemoryView":1408 - * - * if ndim == 1: - * for i in range(extent): # <<<<<<<<<<<<<< - * memcpy(data, item, itemsize) - * data += stride - */ - __pyx_t_2 = __pyx_v_extent; - __pyx_t_3 = __pyx_t_2; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "View.MemoryView":1409 - * if ndim == 1: - * for i in range(extent): - * memcpy(data, item, itemsize) # <<<<<<<<<<<<<< - * data += stride - * else: - */ - (void)(memcpy(__pyx_v_data, __pyx_v_item, __pyx_v_itemsize)); - - /* "View.MemoryView":1410 - * for i in range(extent): - * memcpy(data, item, itemsize) - * data += stride # <<<<<<<<<<<<<< - * else: - * for i in range(extent): - */ - __pyx_v_data = (__pyx_v_data + __pyx_v_stride); - } - - /* "View.MemoryView":1407 - * cdef Py_ssize_t extent = shape[0] - * - * if ndim == 1: # <<<<<<<<<<<<<< - * for i in range(extent): - * memcpy(data, item, itemsize) - */ - goto __pyx_L3; - } - - /* "View.MemoryView":1412 - * data += stride - * else: - * for i in range(extent): # <<<<<<<<<<<<<< - * _slice_assign_scalar(data, shape + 1, strides + 1, ndim - 1, itemsize, item) - * data += stride - */ - /*else*/ { - __pyx_t_2 = __pyx_v_extent; - __pyx_t_3 = __pyx_t_2; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_i = __pyx_t_4; - - /* "View.MemoryView":1413 - * else: - * for i in range(extent): - * _slice_assign_scalar(data, shape + 1, strides + 1, ndim - 1, itemsize, item) # <<<<<<<<<<<<<< - * data += stride - * - */ - __pyx_memoryview__slice_assign_scalar(__pyx_v_data, (__pyx_v_shape + 1), (__pyx_v_strides + 1), (__pyx_v_ndim - 1), __pyx_v_itemsize, __pyx_v_item); - - /* "View.MemoryView":1414 - * for i in range(extent): - * _slice_assign_scalar(data, shape + 1, strides + 1, ndim - 1, itemsize, item) - * data += stride # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_data = (__pyx_v_data + __pyx_v_stride); - } - } - __pyx_L3:; - - /* "View.MemoryView":1400 - * - * @cname('__pyx_memoryview__slice_assign_scalar') - * cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< - * Py_ssize_t *strides, int ndim, - * size_t itemsize, void *item) noexcept nogil: - */ - - /* function exit code */ -} - -/* "(tree fragment)":1 - * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state): # <<<<<<<<<<<<<< - * cdef object __pyx_PickleError - * cdef object __pyx_result - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -static PyMethodDef __pyx_mdef_15View_dot_MemoryView_1__pyx_unpickle_Enum = {"__pyx_unpickle_Enum", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_15View_dot_MemoryView_1__pyx_unpickle_Enum(PyObject *__pyx_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v___pyx_type = 0; - long __pyx_v___pyx_checksum; - PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[3] = {0,0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__pyx_unpickle_Enum (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_type,&__pyx_n_s_pyx_checksum,&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 3: values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - CYTHON_FALLTHROUGH; - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_type)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 1, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_checksum)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[1]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 1, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, 1); __PYX_ERR(1, 1, __pyx_L3_error) - } - CYTHON_FALLTHROUGH; - case 2: - if (likely((values[2] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[2]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 1, __pyx_L3_error) - else { - __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, 2); __PYX_ERR(1, 1, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__pyx_unpickle_Enum") < 0)) __PYX_ERR(1, 1, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 3)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - values[2] = __Pyx_Arg_FASTCALL(__pyx_args, 2); - } - __pyx_v___pyx_type = values[0]; - __pyx_v___pyx_checksum = __Pyx_PyInt_As_long(values[1]); if (unlikely((__pyx_v___pyx_checksum == (long)-1) && PyErr_Occurred())) __PYX_ERR(1, 1, __pyx_L3_error) - __pyx_v___pyx_state = values[2]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, __pyx_nargs); __PYX_ERR(1, 1, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("View.MemoryView.__pyx_unpickle_Enum", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(__pyx_self, __pyx_v___pyx_type, __pyx_v___pyx_checksum, __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v___pyx_type, long __pyx_v___pyx_checksum, PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_v___pyx_PickleError = 0; - PyObject *__pyx_v___pyx_result = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_unpickle_Enum", 0); - - /* "(tree fragment)":4 - * cdef object __pyx_PickleError - * cdef object __pyx_result - * if __pyx_checksum not in (0x82a3537, 0x6ae9995, 0xb068931): # <<<<<<<<<<<<<< - * from pickle import PickleError as __pyx_PickleError - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - */ - __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v___pyx_checksum); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 4, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = (__Pyx_PySequence_ContainsTF(__pyx_t_1, __pyx_tuple__8, Py_NE)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(1, 4, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (__pyx_t_2) { - - /* "(tree fragment)":5 - * cdef object __pyx_result - * if __pyx_checksum not in (0x82a3537, 0x6ae9995, 0xb068931): - * from pickle import PickleError as __pyx_PickleError # <<<<<<<<<<<<<< - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - * __pyx_result = Enum.__new__(__pyx_type) - */ - __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 5, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_n_s_PickleError); - __Pyx_GIVEREF(__pyx_n_s_PickleError); - if (__Pyx_PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_PickleError)) __PYX_ERR(1, 5, __pyx_L1_error); - __pyx_t_3 = __Pyx_Import(__pyx_n_s_pickle, __pyx_t_1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 5, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_PickleError); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 5, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_INCREF(__pyx_t_1); - __pyx_v___pyx_PickleError = __pyx_t_1; - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "(tree fragment)":6 - * if __pyx_checksum not in (0x82a3537, 0x6ae9995, 0xb068931): - * from pickle import PickleError as __pyx_PickleError - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum # <<<<<<<<<<<<<< - * __pyx_result = Enum.__new__(__pyx_type) - * if __pyx_state is not None: - */ - __pyx_t_3 = __Pyx_PyInt_From_long(__pyx_v___pyx_checksum); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 6, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_Incompatible_checksums_0x_x_vs_0, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 6, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_Raise(__pyx_v___pyx_PickleError, __pyx_t_1, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __PYX_ERR(1, 6, __pyx_L1_error) - - /* "(tree fragment)":4 - * cdef object __pyx_PickleError - * cdef object __pyx_result - * if __pyx_checksum not in (0x82a3537, 0x6ae9995, 0xb068931): # <<<<<<<<<<<<<< - * from pickle import PickleError as __pyx_PickleError - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - */ - } - - /* "(tree fragment)":7 - * from pickle import PickleError as __pyx_PickleError - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - * __pyx_result = Enum.__new__(__pyx_type) # <<<<<<<<<<<<<< - * if __pyx_state is not None: - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) - */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_MemviewEnum_type), __pyx_n_s_new); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 7, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = NULL; - __pyx_t_5 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_3))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3); - if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3); - __Pyx_INCREF(__pyx_t_4); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_3, function); - __pyx_t_5 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_4, __pyx_v___pyx_type}; - __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_3, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 7, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - } - __pyx_v___pyx_result = __pyx_t_1; - __pyx_t_1 = 0; - - /* "(tree fragment)":8 - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - * __pyx_result = Enum.__new__(__pyx_type) - * if __pyx_state is not None: # <<<<<<<<<<<<<< - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) - * return __pyx_result - */ - __pyx_t_2 = (__pyx_v___pyx_state != Py_None); - if (__pyx_t_2) { - - /* "(tree fragment)":9 - * __pyx_result = Enum.__new__(__pyx_type) - * if __pyx_state is not None: - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) # <<<<<<<<<<<<<< - * return __pyx_result - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): - */ - if (!(likely(PyTuple_CheckExact(__pyx_v___pyx_state))||((__pyx_v___pyx_state) == Py_None) || __Pyx_RaiseUnexpectedTypeError("tuple", __pyx_v___pyx_state))) __PYX_ERR(1, 9, __pyx_L1_error) - __pyx_t_1 = __pyx_unpickle_Enum__set_state(((struct __pyx_MemviewEnum_obj *)__pyx_v___pyx_result), ((PyObject*)__pyx_v___pyx_state)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 9, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "(tree fragment)":8 - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - * __pyx_result = Enum.__new__(__pyx_type) - * if __pyx_state is not None: # <<<<<<<<<<<<<< - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) - * return __pyx_result - */ - } - - /* "(tree fragment)":10 - * if __pyx_state is not None: - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) - * return __pyx_result # <<<<<<<<<<<<<< - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): - * __pyx_result.name = __pyx_state[0] - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v___pyx_result); - __pyx_r = __pyx_v___pyx_result; - goto __pyx_L0; - - /* "(tree fragment)":1 - * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state): # <<<<<<<<<<<<<< - * cdef object __pyx_PickleError - * cdef object __pyx_result - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("View.MemoryView.__pyx_unpickle_Enum", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v___pyx_PickleError); - __Pyx_XDECREF(__pyx_v___pyx_result); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":11 - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) - * return __pyx_result - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): # <<<<<<<<<<<<<< - * __pyx_result.name = __pyx_state[0] - * if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'): - */ - -static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *__pyx_v___pyx_result, PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - Py_ssize_t __pyx_t_3; - int __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - int __pyx_t_8; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__pyx_unpickle_Enum__set_state", 0); - - /* "(tree fragment)":12 - * return __pyx_result - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): - * __pyx_result.name = __pyx_state[0] # <<<<<<<<<<<<<< - * if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'): - * __pyx_result.__dict__.update(__pyx_state[1]) - */ - if (unlikely(__pyx_v___pyx_state == Py_None)) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); - __PYX_ERR(1, 12, __pyx_L1_error) - } - __pyx_t_1 = __Pyx_GetItemInt_Tuple(__pyx_v___pyx_state, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 12, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __Pyx_GOTREF(__pyx_v___pyx_result->name); - __Pyx_DECREF(__pyx_v___pyx_result->name); - __pyx_v___pyx_result->name = __pyx_t_1; - __pyx_t_1 = 0; - - /* "(tree fragment)":13 - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): - * __pyx_result.name = __pyx_state[0] - * if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'): # <<<<<<<<<<<<<< - * __pyx_result.__dict__.update(__pyx_state[1]) - */ - if (unlikely(__pyx_v___pyx_state == Py_None)) { - PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(1, 13, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyTuple_GET_SIZE(__pyx_v___pyx_state); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 13, __pyx_L1_error) - __pyx_t_4 = (__pyx_t_3 > 1); - if (__pyx_t_4) { - } else { - __pyx_t_2 = __pyx_t_4; - goto __pyx_L4_bool_binop_done; - } - __pyx_t_4 = __Pyx_HasAttr(((PyObject *)__pyx_v___pyx_result), __pyx_n_s_dict); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 13, __pyx_L1_error) - __pyx_t_2 = __pyx_t_4; - __pyx_L4_bool_binop_done:; - if (__pyx_t_2) { - - /* "(tree fragment)":14 - * __pyx_result.name = __pyx_state[0] - * if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'): - * __pyx_result.__dict__.update(__pyx_state[1]) # <<<<<<<<<<<<<< - */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v___pyx_result), __pyx_n_s_dict); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 14, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_update); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 14, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (unlikely(__pyx_v___pyx_state == Py_None)) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable"); - __PYX_ERR(1, 14, __pyx_L1_error) - } - __pyx_t_5 = __Pyx_GetItemInt_Tuple(__pyx_v___pyx_state, 1, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 14, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_7 = NULL; - __pyx_t_8 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_7)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_7); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); - __pyx_t_8 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_7, __pyx_t_5}; - __pyx_t_1 = __Pyx_PyObject_FastCall(__pyx_t_6, __pyx_callargs+1-__pyx_t_8, 1+__pyx_t_8); - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 14, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - } - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "(tree fragment)":13 - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): - * __pyx_result.name = __pyx_state[0] - * if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'): # <<<<<<<<<<<<<< - * __pyx_result.__dict__.update(__pyx_state[1]) - */ - } - - /* "(tree fragment)":11 - * __pyx_unpickle_Enum__set_state( __pyx_result, __pyx_state) - * return __pyx_result - * cdef __pyx_unpickle_Enum__set_state(Enum __pyx_result, tuple __pyx_state): # <<<<<<<<<<<<<< - * __pyx_result.name = __pyx_state[0] - * if len(__pyx_state) > 1 and hasattr(__pyx_result, '__dict__'): - */ - - /* function exit code */ - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("View.MemoryView.__pyx_unpickle_Enum__set_state", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":25 - * - * - * cdef bytes str_as_bytes(s): # <<<<<<<<<<<<<< - * if isinstance(s, unicode): - * return (s).encode('utf-8') - */ - -static PyObject *__pyx_f_9csimdjson_str_as_bytes(PyObject *__pyx_v_s) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("str_as_bytes", 0); - - /* "csimdjson.pyx":26 - * - * cdef bytes str_as_bytes(s): - * if isinstance(s, unicode): # <<<<<<<<<<<<<< - * return (s).encode('utf-8') - * return s - */ - __pyx_t_1 = PyUnicode_Check(__pyx_v_s); - if (__pyx_t_1) { - - /* "csimdjson.pyx":27 - * cdef bytes str_as_bytes(s): - * if isinstance(s, unicode): - * return (s).encode('utf-8') # <<<<<<<<<<<<<< - * return s - * - */ - __Pyx_XDECREF(__pyx_r); - if (unlikely(__pyx_v_s == Py_None)) { - PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "encode"); - __PYX_ERR(0, 27, __pyx_L1_error) - } - __pyx_t_2 = PyUnicode_AsUTF8String(((PyObject*)__pyx_v_s)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 27, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = ((PyObject*)__pyx_t_2); - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":26 - * - * cdef bytes str_as_bytes(s): - * if isinstance(s, unicode): # <<<<<<<<<<<<<< - * return (s).encode('utf-8') - * return s - */ - } - - /* "csimdjson.pyx":28 - * if isinstance(s, unicode): - * return (s).encode('utf-8') - * return s # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - if (!(likely(PyBytes_CheckExact(__pyx_v_s))||((__pyx_v_s) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_v_s))) __PYX_ERR(0, 28, __pyx_L1_error) - __Pyx_INCREF(__pyx_v_s); - __pyx_r = ((PyObject*)__pyx_v_s); - goto __pyx_L0; - - /* "csimdjson.pyx":25 - * - * - * cdef bytes str_as_bytes(s): # <<<<<<<<<<<<<< - * if isinstance(s, unicode): - * return (s).encode('utf-8') - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.str_as_bytes", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":31 - * - * - * cdef dict object_to_dict(Parser p, simd_object obj, bint recursive): # <<<<<<<<<<<<<< - * cdef: - * dict result = {} - */ - -static PyObject *__pyx_f_9csimdjson_object_to_dict(struct __pyx_obj_9csimdjson_Parser *__pyx_v_p, simdjson::dom::object __pyx_v_obj, int __pyx_v_recursive) { - PyObject *__pyx_v_result = 0; - PyObject *__pyx_v_pyobj = 0; - size_t __pyx_v_size; - char const *__pyx_v_data; - simdjson::dom::object::iterator __pyx_v_it; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_3; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("object_to_dict", 0); - - /* "csimdjson.pyx":33 - * cdef dict object_to_dict(Parser p, simd_object obj, bint recursive): - * cdef: - * dict result = {} # <<<<<<<<<<<<<< - * object pyobj - * size_t size - */ - __pyx_t_1 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_result = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":37 - * size_t size - * const char *data - * simd_object.iterator it = obj.begin() # <<<<<<<<<<<<<< - * - * while it != obj.end(): - */ - __pyx_v_it = __pyx_v_obj.begin(); - - /* "csimdjson.pyx":39 - * simd_object.iterator it = obj.begin() - * - * while it != obj.end(): # <<<<<<<<<<<<<< - * pyobj = element_to_primitive(p, it.value(), recursive) - * - */ - while (1) { - __pyx_t_2 = (__pyx_v_it != __pyx_v_obj.end()); - if (!__pyx_t_2) break; - - /* "csimdjson.pyx":40 - * - * while it != obj.end(): - * pyobj = element_to_primitive(p, it.value(), recursive) # <<<<<<<<<<<<<< - * - * data = it.key_c_str() - */ - __pyx_t_3.__pyx_n = 1; - __pyx_t_3.recursive = __pyx_v_recursive; - __pyx_t_1 = __pyx_f_9csimdjson_element_to_primitive(__pyx_v_p, __pyx_v_it.value(), &__pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 40, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_XDECREF_SET(__pyx_v_pyobj, __pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":42 - * pyobj = element_to_primitive(p, it.value(), recursive) - * - * data = it.key_c_str() # <<<<<<<<<<<<<< - * size = it.key_length() - * - */ - __pyx_v_data = __pyx_v_it.key_c_str(); - - /* "csimdjson.pyx":43 - * - * data = it.key_c_str() - * size = it.key_length() # <<<<<<<<<<<<<< - * - * result[data[:size]] = pyobj - */ - __pyx_v_size = __pyx_v_it.key_length(); - - /* "csimdjson.pyx":45 - * size = it.key_length() - * - * result[data[:size]] = pyobj # <<<<<<<<<<<<<< - * preincrement(it) - * - */ - __pyx_t_1 = __Pyx_PyUnicode_FromStringAndSize(__pyx_v_data + 0, __pyx_v_size - 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 45, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - if (unlikely((PyDict_SetItem(__pyx_v_result, __pyx_t_1, __pyx_v_pyobj) < 0))) __PYX_ERR(0, 45, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "csimdjson.pyx":46 - * - * result[data[:size]] = pyobj - * preincrement(it) # <<<<<<<<<<<<<< - * - * return result - */ - (void)((++__pyx_v_it)); - } - - /* "csimdjson.pyx":48 - * preincrement(it) - * - * return result # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_result); - __pyx_r = __pyx_v_result; - goto __pyx_L0; - - /* "csimdjson.pyx":31 - * - * - * cdef dict object_to_dict(Parser p, simd_object obj, bint recursive): # <<<<<<<<<<<<<< - * cdef: - * dict result = {} - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.object_to_dict", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XDECREF(__pyx_v_pyobj); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":51 - * - * - * cdef list array_to_list(Parser p, simd_array arr, bint recursive): # <<<<<<<<<<<<<< - * cdef: - * list result = PyList_New(arr.size()) - */ - -static PyObject *__pyx_f_9csimdjson_array_to_list(struct __pyx_obj_9csimdjson_Parser *__pyx_v_p, simdjson::dom::array __pyx_v_arr, int __pyx_v_recursive) { - PyObject *__pyx_v_result = 0; - size_t __pyx_v_i; - simdjson::dom::element __pyx_v_element; - PyObject *__pyx_v_primitive = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - simdjson::dom::array::iterator __pyx_t_2; - simdjson::dom::element __pyx_t_3; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("array_to_list", 0); - - /* "csimdjson.pyx":53 - * cdef list array_to_list(Parser p, simd_array arr, bint recursive): - * cdef: - * list result = PyList_New(arr.size()) # <<<<<<<<<<<<<< - * size_t i = 0 - * - */ - __pyx_t_1 = PyList_New(__pyx_v_arr.size()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 53, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_v_result = ((PyObject*)__pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":54 - * cdef: - * list result = PyList_New(arr.size()) - * size_t i = 0 # <<<<<<<<<<<<<< - * - * for element in arr: - */ - __pyx_v_i = 0; - - /* "csimdjson.pyx":56 - * size_t i = 0 - * - * for element in arr: # <<<<<<<<<<<<<< - * primitive = element_to_primitive(p, element, recursive) - * Py_INCREF(primitive) - */ - __pyx_t_2 = __pyx_v_arr.begin(); - for (;;) { - if (!(__pyx_t_2 != __pyx_v_arr.end())) break; - __pyx_t_3 = *__pyx_t_2; - ++__pyx_t_2; - __pyx_v_element = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_3); - - /* "csimdjson.pyx":57 - * - * for element in arr: - * primitive = element_to_primitive(p, element, recursive) # <<<<<<<<<<<<<< - * Py_INCREF(primitive) - * PyList_SET_ITEM( - */ - __pyx_t_4.__pyx_n = 1; - __pyx_t_4.recursive = __pyx_v_recursive; - __pyx_t_1 = __pyx_f_9csimdjson_element_to_primitive(__pyx_v_p, __pyx_v_element, &__pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 57, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_XDECREF_SET(__pyx_v_primitive, __pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":58 - * for element in arr: - * primitive = element_to_primitive(p, element, recursive) - * Py_INCREF(primitive) # <<<<<<<<<<<<<< - * PyList_SET_ITEM( - * result, - */ - Py_INCREF(__pyx_v_primitive); - - /* "csimdjson.pyx":59 - * primitive = element_to_primitive(p, element, recursive) - * Py_INCREF(primitive) - * PyList_SET_ITEM( # <<<<<<<<<<<<<< - * result, - * i, - */ - PyList_SET_ITEM(__pyx_v_result, __pyx_v_i, __pyx_v_primitive); - - /* "csimdjson.pyx":64 - * primitive - * ) - * i += 1 # <<<<<<<<<<<<<< - * - * return result - */ - __pyx_v_i = (__pyx_v_i + 1); - - /* "csimdjson.pyx":56 - * size_t i = 0 - * - * for element in arr: # <<<<<<<<<<<<<< - * primitive = element_to_primitive(p, element, recursive) - * Py_INCREF(primitive) - */ - } - - /* "csimdjson.pyx":66 - * i += 1 - * - * return result # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_result); - __pyx_r = __pyx_v_result; - goto __pyx_L0; - - /* "csimdjson.pyx":51 - * - * - * cdef list array_to_list(Parser p, simd_array arr, bint recursive): # <<<<<<<<<<<<<< - * cdef: - * list result = PyList_New(arr.size()) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.array_to_list", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XDECREF(__pyx_v_primitive); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":69 - * - * - * cdef inline object element_to_primitive(Parser p, simd_element e, # <<<<<<<<<<<<<< - * bint recursive=False): - * cdef: - */ - -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_element_to_primitive(struct __pyx_obj_9csimdjson_Parser *__pyx_v_p, simdjson::dom::element __pyx_v_e, struct __pyx_opt_args_9csimdjson_element_to_primitive *__pyx_optional_args) { - - /* "csimdjson.pyx":70 - * - * cdef inline object element_to_primitive(Parser p, simd_element e, - * bint recursive=False): # <<<<<<<<<<<<<< - * cdef: - * const char *data - */ - int __pyx_v_recursive = ((int)0); - char const *__pyx_v_data; - size_t __pyx_v_size; - enum simdjson::dom::element_type __pyx_v_type_; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - enum simdjson::dom::element_type __pyx_t_1; - simdjson::dom::object __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - simdjson::dom::array __pyx_t_4; - char const *__pyx_t_5; - size_t __pyx_t_6; - int64_t __pyx_t_7; - uint64_t __pyx_t_8; - double __pyx_t_9; - int __pyx_t_10; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("element_to_primitive", 0); - if (__pyx_optional_args) { - if (__pyx_optional_args->__pyx_n > 0) { - __pyx_v_recursive = __pyx_optional_args->recursive; - } - } - - /* "csimdjson.pyx":74 - * const char *data - * size_t size - * element_type type_ = e.type() # <<<<<<<<<<<<<< - * - * if type_ == element_type.OBJECT: - */ - try { - __pyx_t_1 = __pyx_v_e.type(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 74, __pyx_L1_error) - } - __pyx_v_type_ = __pyx_t_1; - - /* "csimdjson.pyx":76 - * element_type type_ = e.type() - * - * if type_ == element_type.OBJECT: # <<<<<<<<<<<<<< - * if recursive: - * return object_to_dict(p, e.get_object(), recursive) - */ - switch (__pyx_v_type_) { - case simdjson::dom::element_type::OBJECT: - - /* "csimdjson.pyx":77 - * - * if type_ == element_type.OBJECT: - * if recursive: # <<<<<<<<<<<<<< - * return object_to_dict(p, e.get_object(), recursive) - * return Object.from_element(p, e) - */ - if (__pyx_v_recursive) { - - /* "csimdjson.pyx":78 - * if type_ == element_type.OBJECT: - * if recursive: - * return object_to_dict(p, e.get_object(), recursive) # <<<<<<<<<<<<<< - * return Object.from_element(p, e) - * elif type_ == element_type.ARRAY: - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_2 = __pyx_v_e.get_object(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 78, __pyx_L1_error) - } - __pyx_t_3 = __pyx_f_9csimdjson_object_to_dict(__pyx_v_p, __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_2), __pyx_v_recursive); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 78, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":77 - * - * if type_ == element_type.OBJECT: - * if recursive: # <<<<<<<<<<<<<< - * return object_to_dict(p, e.get_object(), recursive) - * return Object.from_element(p, e) - */ - } - - /* "csimdjson.pyx":79 - * if recursive: - * return object_to_dict(p, e.get_object(), recursive) - * return Object.from_element(p, e) # <<<<<<<<<<<<<< - * elif type_ == element_type.ARRAY: - * if recursive: - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __pyx_f_9csimdjson_6Object_from_element(__pyx_v_p, __pyx_v_e); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 79, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":76 - * element_type type_ = e.type() - * - * if type_ == element_type.OBJECT: # <<<<<<<<<<<<<< - * if recursive: - * return object_to_dict(p, e.get_object(), recursive) - */ - break; - case simdjson::dom::element_type::ARRAY: - - /* "csimdjson.pyx":81 - * return Object.from_element(p, e) - * elif type_ == element_type.ARRAY: - * if recursive: # <<<<<<<<<<<<<< - * return array_to_list(p, e.get_array(), recursive) - * return Array.from_element(p, e) - */ - if (__pyx_v_recursive) { - - /* "csimdjson.pyx":82 - * elif type_ == element_type.ARRAY: - * if recursive: - * return array_to_list(p, e.get_array(), recursive) # <<<<<<<<<<<<<< - * return Array.from_element(p, e) - * elif type_ == element_type.STRING: - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_4 = __pyx_v_e.get_array(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 82, __pyx_L1_error) - } - __pyx_t_3 = __pyx_f_9csimdjson_array_to_list(__pyx_v_p, __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), __pyx_v_recursive); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 82, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":81 - * return Object.from_element(p, e) - * elif type_ == element_type.ARRAY: - * if recursive: # <<<<<<<<<<<<<< - * return array_to_list(p, e.get_array(), recursive) - * return Array.from_element(p, e) - */ - } - - /* "csimdjson.pyx":83 - * if recursive: - * return array_to_list(p, e.get_array(), recursive) - * return Array.from_element(p, e) # <<<<<<<<<<<<<< - * elif type_ == element_type.STRING: - * data = e.get_c_str() - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __pyx_f_9csimdjson_5Array_from_element(__pyx_v_p, __pyx_v_e); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 83, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":80 - * return object_to_dict(p, e.get_object(), recursive) - * return Object.from_element(p, e) - * elif type_ == element_type.ARRAY: # <<<<<<<<<<<<<< - * if recursive: - * return array_to_list(p, e.get_array(), recursive) - */ - break; - case simdjson::dom::element_type::STRING: - - /* "csimdjson.pyx":85 - * return Array.from_element(p, e) - * elif type_ == element_type.STRING: - * data = e.get_c_str() # <<<<<<<<<<<<<< - * size = e.get_string_length() - * return data[:size] - */ - try { - __pyx_t_5 = __pyx_v_e.get_c_str(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 85, __pyx_L1_error) - } - __pyx_v_data = __pyx_t_5; - - /* "csimdjson.pyx":86 - * elif type_ == element_type.STRING: - * data = e.get_c_str() - * size = e.get_string_length() # <<<<<<<<<<<<<< - * return data[:size] - * elif type_ == element_type.INT64: - */ - try { - __pyx_t_6 = __pyx_v_e.get_string_length(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 86, __pyx_L1_error) - } - __pyx_v_size = __pyx_t_6; - - /* "csimdjson.pyx":87 - * data = e.get_c_str() - * size = e.get_string_length() - * return data[:size] # <<<<<<<<<<<<<< - * elif type_ == element_type.INT64: - * return e.get_int64() - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = __Pyx_PyUnicode_FromStringAndSize(__pyx_v_data + 0, __pyx_v_size - 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 87, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":84 - * return array_to_list(p, e.get_array(), recursive) - * return Array.from_element(p, e) - * elif type_ == element_type.STRING: # <<<<<<<<<<<<<< - * data = e.get_c_str() - * size = e.get_string_length() - */ - break; - case simdjson::dom::element_type::INT64: - - /* "csimdjson.pyx":89 - * return data[:size] - * elif type_ == element_type.INT64: - * return e.get_int64() # <<<<<<<<<<<<<< - * elif type_ == element_type.UINT64: - * return e.get_uint64() - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_7 = __pyx_v_e.get_int64(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 89, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyInt_From_int64_t(__pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 89, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":88 - * size = e.get_string_length() - * return data[:size] - * elif type_ == element_type.INT64: # <<<<<<<<<<<<<< - * return e.get_int64() - * elif type_ == element_type.UINT64: - */ - break; - case simdjson::dom::element_type::UINT64: - - /* "csimdjson.pyx":91 - * return e.get_int64() - * elif type_ == element_type.UINT64: - * return e.get_uint64() # <<<<<<<<<<<<<< - * elif type_ == element_type.DOUBLE: - * return e.get_double() - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_8 = __pyx_v_e.get_uint64(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 91, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyInt_From_uint64_t(__pyx_t_8); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 91, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":90 - * elif type_ == element_type.INT64: - * return e.get_int64() - * elif type_ == element_type.UINT64: # <<<<<<<<<<<<<< - * return e.get_uint64() - * elif type_ == element_type.DOUBLE: - */ - break; - case simdjson::dom::element_type::DOUBLE: - - /* "csimdjson.pyx":93 - * return e.get_uint64() - * elif type_ == element_type.DOUBLE: - * return e.get_double() # <<<<<<<<<<<<<< - * elif type_ == element_type.BOOL: - * return e.get_bool() - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_9 = __pyx_v_e.get_double(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 93, __pyx_L1_error) - } - __pyx_t_3 = PyFloat_FromDouble(__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 93, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":92 - * elif type_ == element_type.UINT64: - * return e.get_uint64() - * elif type_ == element_type.DOUBLE: # <<<<<<<<<<<<<< - * return e.get_double() - * elif type_ == element_type.BOOL: - */ - break; - case simdjson::dom::element_type::BOOL: - - /* "csimdjson.pyx":95 - * return e.get_double() - * elif type_ == element_type.BOOL: - * return e.get_bool() # <<<<<<<<<<<<<< - * elif type_ == element_type.NULL_VALUE: - * return None - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_10 = __pyx_v_e.get_bool(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 95, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyBool_FromLong(__pyx_t_10); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 95, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":94 - * elif type_ == element_type.DOUBLE: - * return e.get_double() - * elif type_ == element_type.BOOL: # <<<<<<<<<<<<<< - * return e.get_bool() - * elif type_ == element_type.NULL_VALUE: - */ - break; - case simdjson::dom::element_type::NULL_VALUE: - - /* "csimdjson.pyx":97 - * return e.get_bool() - * elif type_ == element_type.NULL_VALUE: - * return None # <<<<<<<<<<<<<< - * else: - * raise ValueError( # pragma: no cover - */ - __Pyx_XDECREF(__pyx_r); - __pyx_r = Py_None; __Pyx_INCREF(Py_None); - goto __pyx_L0; - - /* "csimdjson.pyx":96 - * elif type_ == element_type.BOOL: - * return e.get_bool() - * elif type_ == element_type.NULL_VALUE: # <<<<<<<<<<<<<< - * return None - * else: - */ - break; - default: - - /* "csimdjson.pyx":99 - * return None - * else: - * raise ValueError( # pragma: no cover # <<<<<<<<<<<<<< - * 'Encountered an unknown element_type.' - * ) - */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 99, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_Raise(__pyx_t_3, 0, 0, 0); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(0, 99, __pyx_L1_error) - break; - } - - /* "csimdjson.pyx":69 - * - * - * cdef inline object element_to_primitive(Parser p, simd_element e, # <<<<<<<<<<<<<< - * bint recursive=False): - * cdef: - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("csimdjson.element_to_primitive", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":123 - * cdef readonly size_t size - * - * def __cinit__(self): # <<<<<<<<<<<<<< - * self.buffer = NULL - * self.size = 0 - */ - -/* Python wrapper */ -static int __pyx_pw_9csimdjson_11ArrayBuffer_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pw_9csimdjson_11ArrayBuffer_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 123, __pyx_L3_error) - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__cinit__", 1, 0, 0, __pyx_nargs); return -1;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_VARARGS(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__cinit__", 0))) return -1; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.ArrayBuffer.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_11ArrayBuffer___cinit__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_9csimdjson_11ArrayBuffer___cinit__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self) { - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__", 0); - - /* "csimdjson.pyx":124 - * - * def __cinit__(self): - * self.buffer = NULL # <<<<<<<<<<<<<< - * self.size = 0 - * - */ - __pyx_v_self->buffer = NULL; - - /* "csimdjson.pyx":125 - * def __cinit__(self): - * self.buffer = NULL - * self.size = 0 # <<<<<<<<<<<<<< - * - * def __dealloc__(self): - */ - __pyx_v_self->size = 0; - - /* "csimdjson.pyx":123 - * cdef readonly size_t size - * - * def __cinit__(self): # <<<<<<<<<<<<<< - * self.buffer = NULL - * self.size = 0 - */ - - /* function exit code */ - __pyx_r = 0; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":127 - * self.size = 0 - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * if self.buffer != NULL: - * PyMem_Free(self.buffer) - */ - -/* Python wrapper */ -static void __pyx_pw_9csimdjson_11ArrayBuffer_3__dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_pw_9csimdjson_11ArrayBuffer_3__dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_pf_9csimdjson_11ArrayBuffer_2__dealloc__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_pf_9csimdjson_11ArrayBuffer_2__dealloc__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self) { - __Pyx_RefNannyDeclarations - int __pyx_t_1; - __Pyx_RefNannySetupContext("__dealloc__", 0); - - /* "csimdjson.pyx":128 - * - * def __dealloc__(self): - * if self.buffer != NULL: # <<<<<<<<<<<<<< - * PyMem_Free(self.buffer) - * - */ - __pyx_t_1 = (__pyx_v_self->buffer != NULL); - if (__pyx_t_1) { - - /* "csimdjson.pyx":129 - * def __dealloc__(self): - * if self.buffer != NULL: - * PyMem_Free(self.buffer) # <<<<<<<<<<<<<< - * - * @staticmethod - */ - PyMem_Free(__pyx_v_self->buffer); - - /* "csimdjson.pyx":128 - * - * def __dealloc__(self): - * if self.buffer != NULL: # <<<<<<<<<<<<<< - * PyMem_Free(self.buffer) - * - */ - } - - /* "csimdjson.pyx":127 - * self.size = 0 - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * if self.buffer != NULL: - * PyMem_Free(self.buffer) - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "csimdjson.pyx":132 - * - * @staticmethod - * cdef inline from_element(simd_array src, of_type): # <<<<<<<<<<<<<< - * cdef: - * ArrayBuffer self = ArrayBuffer.__new__(ArrayBuffer) - */ - -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_11ArrayBuffer_from_element(simdjson::dom::array __pyx_v_src, PyObject *__pyx_v_of_type) { - struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_t_2; - void *__pyx_t_3; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("from_element", 0); - - /* "csimdjson.pyx":134 - * cdef inline from_element(simd_array src, of_type): - * cdef: - * ArrayBuffer self = ArrayBuffer.__new__(ArrayBuffer) # <<<<<<<<<<<<<< - * - * if of_type == 'd': - */ - __pyx_t_1 = ((PyObject *)__pyx_tp_new_9csimdjson_ArrayBuffer(((PyTypeObject *)__pyx_ptype_9csimdjson_ArrayBuffer), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 134, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_1); - __pyx_v_self = ((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":136 - * ArrayBuffer self = ArrayBuffer.__new__(ArrayBuffer) - * - * if of_type == 'd': # <<<<<<<<<<<<<< - * self.buffer = flatten_array[double](src, &self.size) - * elif of_type == 'i': - */ - __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_of_type, __pyx_n_u_d, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 136, __pyx_L1_error) - if (__pyx_t_2) { - - /* "csimdjson.pyx":137 - * - * if of_type == 'd': - * self.buffer = flatten_array[double](src, &self.size) # <<<<<<<<<<<<<< - * elif of_type == 'i': - * self.buffer = flatten_array[int64_t](src, &self.size) - */ - try { - __pyx_t_3 = flatten_array(__pyx_v_src, (&__pyx_v_self->size)); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 137, __pyx_L1_error) - } - __pyx_v_self->buffer = __pyx_t_3; - - /* "csimdjson.pyx":136 - * ArrayBuffer self = ArrayBuffer.__new__(ArrayBuffer) - * - * if of_type == 'd': # <<<<<<<<<<<<<< - * self.buffer = flatten_array[double](src, &self.size) - * elif of_type == 'i': - */ - goto __pyx_L3; - } - - /* "csimdjson.pyx":138 - * if of_type == 'd': - * self.buffer = flatten_array[double](src, &self.size) - * elif of_type == 'i': # <<<<<<<<<<<<<< - * self.buffer = flatten_array[int64_t](src, &self.size) - * elif of_type == 'u': - */ - __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_of_type, __pyx_n_u_i, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 138, __pyx_L1_error) - if (__pyx_t_2) { - - /* "csimdjson.pyx":139 - * self.buffer = flatten_array[double](src, &self.size) - * elif of_type == 'i': - * self.buffer = flatten_array[int64_t](src, &self.size) # <<<<<<<<<<<<<< - * elif of_type == 'u': - * self.buffer = flatten_array[uint64_t](src, &self.size) - */ - try { - __pyx_t_3 = flatten_array(__pyx_v_src, (&__pyx_v_self->size)); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 139, __pyx_L1_error) - } - __pyx_v_self->buffer = __pyx_t_3; - - /* "csimdjson.pyx":138 - * if of_type == 'd': - * self.buffer = flatten_array[double](src, &self.size) - * elif of_type == 'i': # <<<<<<<<<<<<<< - * self.buffer = flatten_array[int64_t](src, &self.size) - * elif of_type == 'u': - */ - goto __pyx_L3; - } - - /* "csimdjson.pyx":140 - * elif of_type == 'i': - * self.buffer = flatten_array[int64_t](src, &self.size) - * elif of_type == 'u': # <<<<<<<<<<<<<< - * self.buffer = flatten_array[uint64_t](src, &self.size) - * else: - */ - __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_of_type, __pyx_n_u_u, Py_EQ)); if (unlikely((__pyx_t_2 < 0))) __PYX_ERR(0, 140, __pyx_L1_error) - if (likely(__pyx_t_2)) { - - /* "csimdjson.pyx":141 - * self.buffer = flatten_array[int64_t](src, &self.size) - * elif of_type == 'u': - * self.buffer = flatten_array[uint64_t](src, &self.size) # <<<<<<<<<<<<<< - * else: - * raise ValueError('of_type must be one of {d,i,u}.') - */ - try { - __pyx_t_3 = flatten_array(__pyx_v_src, (&__pyx_v_self->size)); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 141, __pyx_L1_error) - } - __pyx_v_self->buffer = __pyx_t_3; - - /* "csimdjson.pyx":140 - * elif of_type == 'i': - * self.buffer = flatten_array[int64_t](src, &self.size) - * elif of_type == 'u': # <<<<<<<<<<<<<< - * self.buffer = flatten_array[uint64_t](src, &self.size) - * else: - */ - goto __pyx_L3; - } - - /* "csimdjson.pyx":143 - * self.buffer = flatten_array[uint64_t](src, &self.size) - * else: - * raise ValueError('of_type must be one of {d,i,u}.') # <<<<<<<<<<<<<< - * - * if not self.buffer: - */ - /*else*/ { - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 143, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __Pyx_Raise(__pyx_t_1, 0, 0, 0); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __PYX_ERR(0, 143, __pyx_L1_error) - } - __pyx_L3:; - - /* "csimdjson.pyx":145 - * raise ValueError('of_type must be one of {d,i,u}.') - * - * if not self.buffer: # <<<<<<<<<<<<<< - * raise MemoryError() # pragma: no cover - * - */ - __pyx_t_2 = (!(__pyx_v_self->buffer != 0)); - if (unlikely(__pyx_t_2)) { - - /* "csimdjson.pyx":146 - * - * if not self.buffer: - * raise MemoryError() # pragma: no cover # <<<<<<<<<<<<<< - * - * return self - */ - PyErr_NoMemory(); __PYX_ERR(0, 146, __pyx_L1_error) - - /* "csimdjson.pyx":145 - * raise ValueError('of_type must be one of {d,i,u}.') - * - * if not self.buffer: # <<<<<<<<<<<<<< - * raise MemoryError() # pragma: no cover - * - */ - } - - /* "csimdjson.pyx":148 - * raise MemoryError() # pragma: no cover - * - * return self # <<<<<<<<<<<<<< - * - * def __getbuffer__(self, Py_buffer *buffer, int flags): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __pyx_r = ((PyObject *)__pyx_v_self); - goto __pyx_L0; - - /* "csimdjson.pyx":132 - * - * @staticmethod - * cdef inline from_element(simd_array src, of_type): # <<<<<<<<<<<<<< - * cdef: - * ArrayBuffer self = ArrayBuffer.__new__(ArrayBuffer) - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.ArrayBuffer.from_element", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_self); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":150 - * return self - * - * def __getbuffer__(self, Py_buffer *buffer, int flags): # <<<<<<<<<<<<<< - * PyBuffer_FillInfo(buffer, self, self.buffer, self.size, 0, flags) - * - */ - -/* Python wrapper */ -CYTHON_UNUSED static int __pyx_pw_9csimdjson_11ArrayBuffer_5__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_buffer, int __pyx_v_flags); /*proto*/ -CYTHON_UNUSED static int __pyx_pw_9csimdjson_11ArrayBuffer_5__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_buffer, int __pyx_v_flags) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_11ArrayBuffer_4__getbuffer__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self), ((Py_buffer *)__pyx_v_buffer), ((int)__pyx_v_flags)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_9csimdjson_11ArrayBuffer_4__getbuffer__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self, Py_buffer *__pyx_v_buffer, int __pyx_v_flags) { - int __pyx_r; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - if (unlikely(__pyx_v_buffer == NULL)) { - PyErr_SetString(PyExc_BufferError, "PyObject_GetBuffer: view==NULL argument is obsolete"); - return -1; - } - __Pyx_RefNannySetupContext("__getbuffer__", 0); - __pyx_v_buffer->obj = Py_None; __Pyx_INCREF(Py_None); - __Pyx_GIVEREF(__pyx_v_buffer->obj); - - /* "csimdjson.pyx":151 - * - * def __getbuffer__(self, Py_buffer *buffer, int flags): - * PyBuffer_FillInfo(buffer, self, self.buffer, self.size, 0, flags) # <<<<<<<<<<<<<< - * - * def __releasebuffer__(self, Py_buffer *buffer): - */ - __pyx_t_1 = PyBuffer_FillInfo(__pyx_v_buffer, ((PyObject *)__pyx_v_self), __pyx_v_self->buffer, __pyx_v_self->size, 0, __pyx_v_flags); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 151, __pyx_L1_error) - - /* "csimdjson.pyx":150 - * return self - * - * def __getbuffer__(self, Py_buffer *buffer, int flags): # <<<<<<<<<<<<<< - * PyBuffer_FillInfo(buffer, self, self.buffer, self.size, 0, flags) - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.ArrayBuffer.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - if (__pyx_v_buffer->obj != NULL) { - __Pyx_GOTREF(__pyx_v_buffer->obj); - __Pyx_DECREF(__pyx_v_buffer->obj); __pyx_v_buffer->obj = 0; - } - goto __pyx_L2; - __pyx_L0:; - if (__pyx_v_buffer->obj == Py_None) { - __Pyx_GOTREF(__pyx_v_buffer->obj); - __Pyx_DECREF(__pyx_v_buffer->obj); __pyx_v_buffer->obj = 0; - } - __pyx_L2:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":153 - * PyBuffer_FillInfo(buffer, self, self.buffer, self.size, 0, flags) - * - * def __releasebuffer__(self, Py_buffer *buffer): # <<<<<<<<<<<<<< - * pass - * - */ - -/* Python wrapper */ -CYTHON_UNUSED static void __pyx_pw_9csimdjson_11ArrayBuffer_7__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_buffer); /*proto*/ -CYTHON_UNUSED static void __pyx_pw_9csimdjson_11ArrayBuffer_7__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_buffer) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_pf_9csimdjson_11ArrayBuffer_6__releasebuffer__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self), ((Py_buffer *)__pyx_v_buffer)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_pf_9csimdjson_11ArrayBuffer_6__releasebuffer__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self, CYTHON_UNUSED Py_buffer *__pyx_v_buffer) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__releasebuffer__", 0); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "csimdjson.pyx":121 - * """ - * cdef void *buffer - * cdef readonly size_t size # <<<<<<<<<<<<<< - * - * def __cinit__(self): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_11ArrayBuffer_4size_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_11ArrayBuffer_4size_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_11ArrayBuffer_4size___get__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_11ArrayBuffer_4size___get__(struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_FromSize_t(__pyx_v_self->size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 121, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.ArrayBuffer.size.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_11ArrayBuffer_9__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_11ArrayBuffer_8__reduce_cython__, "ArrayBuffer.__reduce_cython__(self)"); -static PyMethodDef __pyx_mdef_9csimdjson_11ArrayBuffer_9__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_11ArrayBuffer_9__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_11ArrayBuffer_8__reduce_cython__}; -static PyObject *__pyx_pw_9csimdjson_11ArrayBuffer_9__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.ArrayBuffer.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_11ArrayBuffer_8__reduce_cython__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_11ArrayBuffer_8__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.ArrayBuffer.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_11ArrayBuffer_11__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_11ArrayBuffer_10__setstate_cython__, "ArrayBuffer.__setstate_cython__(self, __pyx_state)"); -static PyMethodDef __pyx_mdef_9csimdjson_11ArrayBuffer_11__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_11ArrayBuffer_11__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_11ArrayBuffer_10__setstate_cython__}; -static PyObject *__pyx_pw_9csimdjson_11ArrayBuffer_11__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.ArrayBuffer.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_11ArrayBuffer_10__setstate_cython__(((struct __pyx_obj_9csimdjson_ArrayBuffer *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_11ArrayBuffer_10__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_ArrayBuffer *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.ArrayBuffer.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":170 - * - * @staticmethod - * cdef inline from_element(Parser parser, simd_element src): # <<<<<<<<<<<<<< - * cdef Array self = Array.__new__(Array) - * self.parser = parser - */ - -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_5Array_from_element(struct __pyx_obj_9csimdjson_Parser *__pyx_v_parser, simdjson::dom::element __pyx_v_src) { - struct __pyx_obj_9csimdjson_Array *__pyx_v_self = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - simdjson::dom::array __pyx_t_2; - std::shared_ptr __pyx_t_3; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("from_element", 0); - - /* "csimdjson.pyx":171 - * @staticmethod - * cdef inline from_element(Parser parser, simd_element src): - * cdef Array self = Array.__new__(Array) # <<<<<<<<<<<<<< - * self.parser = parser - * self.c_element = src.get_array() - */ - __pyx_t_1 = ((PyObject *)__pyx_tp_new_9csimdjson_Array(((PyTypeObject *)__pyx_ptype_9csimdjson_Array), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 171, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_1); - __pyx_v_self = ((struct __pyx_obj_9csimdjson_Array *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":172 - * cdef inline from_element(Parser parser, simd_element src): - * cdef Array self = Array.__new__(Array) - * self.parser = parser # <<<<<<<<<<<<<< - * self.c_element = src.get_array() - * self.c_parser = parser.c_parser - */ - __Pyx_INCREF((PyObject *)__pyx_v_parser); - __Pyx_GIVEREF((PyObject *)__pyx_v_parser); - __Pyx_GOTREF((PyObject *)__pyx_v_self->parser); - __Pyx_DECREF((PyObject *)__pyx_v_self->parser); - __pyx_v_self->parser = __pyx_v_parser; - - /* "csimdjson.pyx":173 - * cdef Array self = Array.__new__(Array) - * self.parser = parser - * self.c_element = src.get_array() # <<<<<<<<<<<<<< - * self.c_parser = parser.c_parser - * return self - */ - try { - __pyx_t_2 = __pyx_v_src.get_array(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 173, __pyx_L1_error) - } - __pyx_v_self->c_element = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_2); - - /* "csimdjson.pyx":174 - * self.parser = parser - * self.c_element = src.get_array() - * self.c_parser = parser.c_parser # <<<<<<<<<<<<<< - * return self - * - */ - __pyx_t_3 = __pyx_v_parser->c_parser; - __pyx_v_self->c_parser = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_3); - - /* "csimdjson.pyx":175 - * self.c_element = src.get_array() - * self.c_parser = parser.c_parser - * return self # <<<<<<<<<<<<<< - * - * def __getitem__(self, key): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __pyx_r = ((PyObject *)__pyx_v_self); - goto __pyx_L0; - - /* "csimdjson.pyx":170 - * - * @staticmethod - * cdef inline from_element(Parser parser, simd_element src): # <<<<<<<<<<<<<< - * cdef Array self = Array.__new__(Array) - * self.parser = parser - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.Array.from_element", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_self); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":177 - * return self - * - * def __getitem__(self, key): # <<<<<<<<<<<<<< - * cdef: - * Py_ssize_t start = 0, stop = 0, step = 0, slice_length = 0 - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_1__getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_key); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_5Array_1__getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_key) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_5Array___getitem__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self), ((PyObject *)__pyx_v_key)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array___getitem__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self, PyObject *__pyx_v_key) { - Py_ssize_t __pyx_v_start; - Py_ssize_t __pyx_v_stop; - Py_ssize_t __pyx_v_step; - Py_ssize_t __pyx_v_slice_length; - Py_ssize_t __pyx_v_dst; - Py_ssize_t __pyx_v_src; - PyObject *__pyx_v_result = 0; - PyObject *__pyx_v_primitive = NULL; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - int __pyx_t_2; - PyObject *__pyx_t_3 = NULL; - Py_ssize_t __pyx_t_4; - PyObject *__pyx_t_5 = NULL; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - Py_ssize_t __pyx_t_8; - PyObject *(*__pyx_t_9)(PyObject *); - Py_ssize_t __pyx_t_10; - simdjson::dom::element __pyx_t_11; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_12; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__getitem__", 0); - __Pyx_INCREF(__pyx_v_key); - - /* "csimdjson.pyx":179 - * def __getitem__(self, key): - * cdef: - * Py_ssize_t start = 0, stop = 0, step = 0, slice_length = 0 # <<<<<<<<<<<<<< - * Py_ssize_t dst, src - * list result - */ - __pyx_v_start = 0; - __pyx_v_stop = 0; - __pyx_v_step = 0; - __pyx_v_slice_length = 0; - - /* "csimdjson.pyx":183 - * list result - * - * if isinstance(key, slice): # <<<<<<<<<<<<<< - * PySlice_GetIndicesEx( - * key, - */ - __pyx_t_1 = PySlice_Check(__pyx_v_key); - if (__pyx_t_1) { - - /* "csimdjson.pyx":184 - * - * if isinstance(key, slice): - * PySlice_GetIndicesEx( # <<<<<<<<<<<<<< - * key, - * self.c_element.size(), - */ - __pyx_t_2 = PySlice_GetIndicesEx(__pyx_v_key, __pyx_v_self->c_element.size(), (&__pyx_v_start), (&__pyx_v_stop), (&__pyx_v_step), (&__pyx_v_slice_length)); if (unlikely(__pyx_t_2 == ((int)-1))) __PYX_ERR(0, 184, __pyx_L1_error) - - /* "csimdjson.pyx":193 - * ) - * - * result = PyList_New(slice_length) # <<<<<<<<<<<<<< - * for dst, src in enumerate(range(start, stop, step)): - * primitive = element_to_primitive( - */ - __pyx_t_3 = PyList_New(__pyx_v_slice_length); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 193, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_v_result = ((PyObject*)__pyx_t_3); - __pyx_t_3 = 0; - - /* "csimdjson.pyx":194 - * - * result = PyList_New(slice_length) - * for dst, src in enumerate(range(start, stop, step)): # <<<<<<<<<<<<<< - * primitive = element_to_primitive( - * self.parser, - */ - __pyx_t_4 = 0; - __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_start); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = PyInt_FromSsize_t(__pyx_v_stop); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_step); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = PyTuple_New(3); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_GIVEREF(__pyx_t_3); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_3)) __PYX_ERR(0, 194, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_5); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_t_5)) __PYX_ERR(0, 194, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_7, 2, __pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error); - __pyx_t_3 = 0; - __pyx_t_5 = 0; - __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_range, __pyx_t_7, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { - __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_8 = 0; - __pyx_t_9 = NULL; - } else { - __pyx_t_8 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_9 = __Pyx_PyObject_GetIterNextFunc(__pyx_t_7); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 194, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - for (;;) { - if (likely(!__pyx_t_9)) { - if (likely(PyList_CheckExact(__pyx_t_7))) { - if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_7)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(0, 194, __pyx_L1_error) - #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - #endif - } else { - if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_7)) break; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely((0 < 0))) __PYX_ERR(0, 194, __pyx_L1_error) - #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - #endif - } - } else { - __pyx_t_6 = __pyx_t_9(__pyx_t_7); - if (unlikely(!__pyx_t_6)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 194, __pyx_L1_error) - } - break; - } - __Pyx_GOTREF(__pyx_t_6); - } - __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_t_6); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 194, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_src = __pyx_t_10; - __pyx_v_dst = __pyx_t_4; - __pyx_t_4 = (__pyx_t_4 + 1); - - /* "csimdjson.pyx":196 - * for dst, src in enumerate(range(start, stop, step)): - * primitive = element_to_primitive( - * self.parser, # <<<<<<<<<<<<<< - * self.c_element.at(src), - * True - */ - __pyx_t_6 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_6); - - /* "csimdjson.pyx":197 - * primitive = element_to_primitive( - * self.parser, - * self.c_element.at(src), # <<<<<<<<<<<<<< - * True - * ) - */ - try { - __pyx_t_11 = __pyx_v_self->c_element.at(__pyx_v_src); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 197, __pyx_L1_error) - } - - /* "csimdjson.pyx":195 - * result = PyList_New(slice_length) - * for dst, src in enumerate(range(start, stop, step)): - * primitive = element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element.at(src), - */ - __pyx_t_12.__pyx_n = 1; - __pyx_t_12.recursive = 1; - __pyx_t_5 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_6), __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_11), &__pyx_t_12); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 195, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_XDECREF_SET(__pyx_v_primitive, __pyx_t_5); - __pyx_t_5 = 0; - - /* "csimdjson.pyx":200 - * True - * ) - * Py_INCREF(primitive) # <<<<<<<<<<<<<< - * PyList_SET_ITEM( - * result, - */ - Py_INCREF(__pyx_v_primitive); - - /* "csimdjson.pyx":201 - * ) - * Py_INCREF(primitive) - * PyList_SET_ITEM( # <<<<<<<<<<<<<< - * result, - * dst, - */ - PyList_SET_ITEM(__pyx_v_result, __pyx_v_dst, __pyx_v_primitive); - - /* "csimdjson.pyx":194 - * - * result = PyList_New(slice_length) - * for dst, src in enumerate(range(start, stop, step)): # <<<<<<<<<<<<<< - * primitive = element_to_primitive( - * self.parser, - */ - } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "csimdjson.pyx":207 - * ) - * - * return result # <<<<<<<<<<<<<< - * elif isinstance(key, int): - * # Wrap around negative indexes. - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_result); - __pyx_r = __pyx_v_result; - goto __pyx_L0; - - /* "csimdjson.pyx":183 - * list result - * - * if isinstance(key, slice): # <<<<<<<<<<<<<< - * PySlice_GetIndicesEx( - * key, - */ - } - - /* "csimdjson.pyx":208 - * - * return result - * elif isinstance(key, int): # <<<<<<<<<<<<<< - * # Wrap around negative indexes. - * if key < 0: - */ - __pyx_t_1 = PyInt_Check(__pyx_v_key); - if (__pyx_t_1) { - - /* "csimdjson.pyx":210 - * elif isinstance(key, int): - * # Wrap around negative indexes. - * if key < 0: # <<<<<<<<<<<<<< - * key += self.c_element.size() - * - */ - __pyx_t_7 = PyObject_RichCompare(__pyx_v_key, __pyx_int_0, Py_LT); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 210, __pyx_L1_error) - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 210, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (__pyx_t_1) { - - /* "csimdjson.pyx":211 - * # Wrap around negative indexes. - * if key < 0: - * key += self.c_element.size() # <<<<<<<<<<<<<< - * - * return element_to_primitive(self.parser, self.c_element.at(key)) - */ - __pyx_t_7 = __Pyx_PyInt_FromSize_t(__pyx_v_self->c_element.size()); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 211, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyNumber_InPlaceAdd(__pyx_v_key, __pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 211, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF_SET(__pyx_v_key, __pyx_t_5); - __pyx_t_5 = 0; - - /* "csimdjson.pyx":210 - * elif isinstance(key, int): - * # Wrap around negative indexes. - * if key < 0: # <<<<<<<<<<<<<< - * key += self.c_element.size() - * - */ - } - - /* "csimdjson.pyx":208 - * - * return result - * elif isinstance(key, int): # <<<<<<<<<<<<<< - * # Wrap around negative indexes. - * if key < 0: - */ - } - - /* "csimdjson.pyx":213 - * key += self.c_element.size() - * - * return element_to_primitive(self.parser, self.c_element.at(key)) # <<<<<<<<<<<<<< - * - * def __len__(self): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_5 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_5); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_key); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 213, __pyx_L1_error) - try { - __pyx_t_11 = __pyx_v_self->c_element.at(__pyx_t_2); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 213, __pyx_L1_error) - } - __pyx_t_7 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_5), __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_11), NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 213, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":177 - * return self - * - * def __getitem__(self, key): # <<<<<<<<<<<<<< - * cdef: - * Py_ssize_t start = 0, stop = 0, step = 0, slice_length = 0 - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("csimdjson.Array.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_result); - __Pyx_XDECREF(__pyx_v_primitive); - __Pyx_XDECREF(__pyx_v_key); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":215 - * return element_to_primitive(self.parser, self.c_element.at(key)) - * - * def __len__(self): # <<<<<<<<<<<<<< - * return self.c_element.size() - * - */ - -/* Python wrapper */ -static Py_ssize_t __pyx_pw_9csimdjson_5Array_3__len__(PyObject *__pyx_v_self); /*proto*/ -static Py_ssize_t __pyx_pw_9csimdjson_5Array_3__len__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_5Array_2__len__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static Py_ssize_t __pyx_pf_9csimdjson_5Array_2__len__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self) { - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__", 0); - - /* "csimdjson.pyx":216 - * - * def __len__(self): - * return self.c_element.size() # <<<<<<<<<<<<<< - * - * def __iter__(self): - */ - __pyx_r = __pyx_v_self->c_element.size(); - goto __pyx_L0; - - /* "csimdjson.pyx":215 - * return element_to_primitive(self.parser, self.c_element.at(key)) - * - * def __len__(self): # <<<<<<<<<<<<<< - * return self.c_element.size() - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_9csimdjson_5Array_6generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "csimdjson.pyx":218 - * return self.c_element.size() - * - * def __iter__(self): # <<<<<<<<<<<<<< - * cdef simd_array.iterator it = self.c_element.begin() - * while it != self.c_element.end(): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_5__iter__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_5Array_5__iter__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_5Array_4__iter__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_4__iter__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self) { - struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__iter__", 0); - __pyx_cur_scope = (struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)__pyx_tp_new_9csimdjson___pyx_scope_struct____iter__(__pyx_ptype_9csimdjson___pyx_scope_struct____iter__, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 218, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_self = __pyx_v_self; - __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_9csimdjson_5Array_6generator, NULL, (PyObject *) __pyx_cur_scope, __pyx_n_s_iter, __pyx_n_s_Array___iter, __pyx_n_s_csimdjson); if (unlikely(!gen)) __PYX_ERR(0, 218, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Array.__iter__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_9csimdjson_5Array_6generator(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *__pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L6_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 218, __pyx_L1_error) - - /* "csimdjson.pyx":219 - * - * def __iter__(self): - * cdef simd_array.iterator it = self.c_element.begin() # <<<<<<<<<<<<<< - * while it != self.c_element.end(): - * yield element_to_primitive( - */ - __pyx_cur_scope->__pyx_v_it = __pyx_cur_scope->__pyx_v_self->c_element.begin(); - - /* "csimdjson.pyx":220 - * def __iter__(self): - * cdef simd_array.iterator it = self.c_element.begin() - * while it != self.c_element.end(): # <<<<<<<<<<<<<< - * yield element_to_primitive( - * self.parser, - */ - while (1) { - __pyx_t_1 = (__pyx_cur_scope->__pyx_v_it != __pyx_cur_scope->__pyx_v_self->c_element.end()); - if (!__pyx_t_1) break; - - /* "csimdjson.pyx":222 - * while it != self.c_element.end(): - * yield element_to_primitive( - * self.parser, # <<<<<<<<<<<<<< - * dereference(it), - * False - */ - __pyx_t_2 = ((PyObject *)__pyx_cur_scope->__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_2); - - /* "csimdjson.pyx":221 - * cdef simd_array.iterator it = self.c_element.begin() - * while it != self.c_element.end(): - * yield element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * dereference(it), - */ - __pyx_t_4.__pyx_n = 1; - __pyx_t_4.recursive = 0; - __pyx_t_3 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_2), (*__pyx_cur_scope->__pyx_v_it), &__pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 221, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L6_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 221, __pyx_L1_error) - - /* "csimdjson.pyx":226 - * False - * ) - * preincrement(it) # <<<<<<<<<<<<<< - * - * def at_pointer(self, json_pointer): - */ - (void)((++__pyx_cur_scope->__pyx_v_it)); - } - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* "csimdjson.pyx":218 - * return self.c_element.size() - * - * def __iter__(self): # <<<<<<<<<<<<<< - * cdef simd_array.iterator it = self.c_element.begin() - * while it != self.c_element.end(): - */ - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_Generator_Replace_StopIteration(0); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("__iter__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":228 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_8at_pointer(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_5Array_7at_pointer, "Array.at_pointer(self, json_pointer)\nGet the value at the given JSON pointer."); -static PyMethodDef __pyx_mdef_9csimdjson_5Array_8at_pointer = {"at_pointer", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_8at_pointer, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_7at_pointer}; -static PyObject *__pyx_pw_9csimdjson_5Array_8at_pointer(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_json_pointer = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("at_pointer (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 228, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_json_pointer,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_json_pointer)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 228, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "at_pointer") < 0)) __PYX_ERR(0, 228, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v_json_pointer = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("at_pointer", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 228, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Array.at_pointer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_5Array_7at_pointer(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self), __pyx_v_json_pointer); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_7at_pointer(struct __pyx_obj_9csimdjson_Array *__pyx_v_self, PyObject *__pyx_v_json_pointer) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - char const *__pyx_t_3; - simdjson::dom::element __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("at_pointer", 0); - - /* "csimdjson.pyx":230 - * def at_pointer(self, json_pointer): - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element.at_pointer( - */ - __Pyx_XDECREF(__pyx_r); - - /* "csimdjson.pyx":231 - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - * self.parser, # <<<<<<<<<<<<<< - * self.c_element.at_pointer( - * str_as_bytes(json_pointer) - */ - __pyx_t_1 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_1); - - /* "csimdjson.pyx":233 - * self.parser, - * self.c_element.at_pointer( - * str_as_bytes(json_pointer) # <<<<<<<<<<<<<< - * ) - * ) - */ - __pyx_t_2 = __pyx_f_9csimdjson_str_as_bytes(__pyx_v_json_pointer); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 233, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (unlikely(__pyx_t_2 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 233, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyBytes_AsString(__pyx_t_2); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) __PYX_ERR(0, 233, __pyx_L1_error) - - /* "csimdjson.pyx":232 - * return element_to_primitive( - * self.parser, - * self.c_element.at_pointer( # <<<<<<<<<<<<<< - * str_as_bytes(json_pointer) - * ) - */ - try { - __pyx_t_4 = __pyx_v_self->c_element.at_pointer(__pyx_t_3); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 232, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "csimdjson.pyx":230 - * def at_pointer(self, json_pointer): - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element.at_pointer( - */ - __pyx_t_2 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_1), __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 230, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":228 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Array.at_pointer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":237 - * ) - * - * def as_list(self): # <<<<<<<<<<<<<< - * """ - * Convert this Array to a regular python list, recursively - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_10as_list(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_5Array_9as_list, "Array.as_list(self)\n\n Convert this Array to a regular python list, recursively\n converting any objects/lists it finds.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_5Array_10as_list = {"as_list", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_10as_list, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_9as_list}; -static PyObject *__pyx_pw_9csimdjson_5Array_10as_list(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("as_list (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 237, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("as_list", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "as_list", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Array.as_list", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_5Array_9as_list(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_9as_list(struct __pyx_obj_9csimdjson_Array *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("as_list", 0); - - /* "csimdjson.pyx":242 - * converting any objects/lists it finds. - * """ - * return array_to_list(self.parser, self.c_element, True) # <<<<<<<<<<<<<< - * - * def as_buffer(self, *, of_type): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_9csimdjson_array_to_list(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_1), __pyx_v_self->c_element, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 242, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":237 - * ) - * - * def as_list(self): # <<<<<<<<<<<<<< - * """ - * Convert this Array to a regular python list, recursively - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Array.as_list", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":244 - * return array_to_list(self.parser, self.c_element, True) - * - * def as_buffer(self, *, of_type): # <<<<<<<<<<<<<< - * """ - * **Copies** the contents of a **homogeneous** array to an - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_12as_buffer(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_5Array_11as_buffer, "Array.as_buffer(self, *, of_type)\n\n **Copies** the contents of a **homogeneous** array to an\n object that can be used as a `buffer`. This means it can be\n used as input for `numpy.frombuffer`, `bytearray`,\n `memoryview`, etc.\n\n When n-dimensional arrays are encountered, this method will recursively\n flatten them.\n\n .. note::\n\n The object returned by this method contains a *copy* of the Array's\n data. Thus, it's safe to use even after the Array or Parser are\n destroyed or reused.\n\n :param of_type: One of 'd' (double), 'i' (signed 64-bit integer) or 'u'\n (unsigned 64-bit integer).\n "); -static PyMethodDef __pyx_mdef_9csimdjson_5Array_12as_buffer = {"as_buffer", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_12as_buffer, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_11as_buffer}; -static PyObject *__pyx_pw_9csimdjson_5Array_12as_buffer(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_of_type = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("as_buffer (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 244, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_of_type,0}; - if (likely(__pyx_kwds)) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_of_type)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 244, __pyx_L3_error) - else { - __Pyx_RaiseKeywordRequired("as_buffer", __pyx_n_s_of_type); __PYX_ERR(0, 244, __pyx_L3_error) - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, 0, "as_buffer") < 0)) __PYX_ERR(0, 244, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 0)) { - goto __pyx_L5_argtuple_error; - } else { - __Pyx_RaiseKeywordRequired("as_buffer", __pyx_n_s_of_type); __PYX_ERR(0, 244, __pyx_L3_error) - } - __pyx_v_of_type = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("as_buffer", 1, 0, 0, __pyx_nargs); __PYX_ERR(0, 244, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Array.as_buffer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_5Array_11as_buffer(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self), __pyx_v_of_type); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_11as_buffer(struct __pyx_obj_9csimdjson_Array *__pyx_v_self, PyObject *__pyx_v_of_type) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("as_buffer", 0); - - /* "csimdjson.pyx":263 - * (unsigned 64-bit integer). - * """ - * return ArrayBuffer.from_element(self.c_element, of_type) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_f_9csimdjson_11ArrayBuffer_from_element(__pyx_v_self->c_element, __pyx_v_of_type); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 263, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_r = __pyx_t_1; - __pyx_t_1 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":244 - * return array_to_list(self.parser, self.c_element, True) - * - * def as_buffer(self, *, of_type): # <<<<<<<<<<<<<< - * """ - * **Copies** the contents of a **homogeneous** array to an - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.Array.as_buffer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":265 - * return ArrayBuffer.from_element(self.c_element, of_type) - * - * @property # <<<<<<<<<<<<<< - * def mini(self): - * """ - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_4mini_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_5Array_4mini_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_5Array_4mini___get__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_4mini___get__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - std::string __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "csimdjson.pyx":272 - * :rtype: bytes - * """ - * return minify(self.c_element) # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_1 = simdjson::minify(__pyx_v_self->c_element); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 272, __pyx_L1_error) - } - __pyx_t_2 = __pyx_convert_PyBytes_string_to_py_std__in_string(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 272, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject*)__pyx_t_2)); - __pyx_r = __pyx_t_2; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":265 - * return ArrayBuffer.from_element(self.c_element, of_type) - * - * @property # <<<<<<<<<<<<<< - * def mini(self): - * """ - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Array.mini.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":165 - * objects. - * """ - * cdef readonly Parser parser # <<<<<<<<<<<<<< - * cdef simd_array c_element - * cdef shared_ptr[simd_parser] c_parser - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_6parser_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_5Array_6parser_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_5Array_6parser___get__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_6parser___get__(struct __pyx_obj_9csimdjson_Array *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__", 0); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self->parser); - __pyx_r = ((PyObject *)__pyx_v_self->parser); - goto __pyx_L0; - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_14__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_5Array_13__reduce_cython__, "Array.__reduce_cython__(self)"); -static PyMethodDef __pyx_mdef_9csimdjson_5Array_14__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_14__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_13__reduce_cython__}; -static PyObject *__pyx_pw_9csimdjson_5Array_14__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Array.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_5Array_13__reduce_cython__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_13__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Array *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_self_c_element_self_c_parser_can, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Array.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_5Array_16__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_5Array_15__setstate_cython__, "Array.__setstate_cython__(self, __pyx_state)"); -static PyMethodDef __pyx_mdef_9csimdjson_5Array_16__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_16__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_15__setstate_cython__}; -static PyObject *__pyx_pw_9csimdjson_5Array_16__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Array.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_5Array_15__setstate_cython__(((struct __pyx_obj_9csimdjson_Array *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_5Array_15__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Array *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_self_c_element_self_c_parser_can, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Array.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":288 - * - * @staticmethod - * cdef inline from_element(Parser parser, simd_element src): # <<<<<<<<<<<<<< - * cdef Object self = Object.__new__(Object) - * self.parser = parser - */ - -static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_6Object_from_element(struct __pyx_obj_9csimdjson_Parser *__pyx_v_parser, simdjson::dom::element __pyx_v_src) { - struct __pyx_obj_9csimdjson_Object *__pyx_v_self = 0; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - simdjson::dom::object __pyx_t_2; - std::shared_ptr __pyx_t_3; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("from_element", 0); - - /* "csimdjson.pyx":289 - * @staticmethod - * cdef inline from_element(Parser parser, simd_element src): - * cdef Object self = Object.__new__(Object) # <<<<<<<<<<<<<< - * self.parser = parser - * self.c_element = src.get_object() - */ - __pyx_t_1 = ((PyObject *)__pyx_tp_new_9csimdjson_Object(((PyTypeObject *)__pyx_ptype_9csimdjson_Object), __pyx_empty_tuple, NULL)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 289, __pyx_L1_error) - __Pyx_GOTREF((PyObject *)__pyx_t_1); - __pyx_v_self = ((struct __pyx_obj_9csimdjson_Object *)__pyx_t_1); - __pyx_t_1 = 0; - - /* "csimdjson.pyx":290 - * cdef inline from_element(Parser parser, simd_element src): - * cdef Object self = Object.__new__(Object) - * self.parser = parser # <<<<<<<<<<<<<< - * self.c_element = src.get_object() - * self.c_parser = parser.c_parser - */ - __Pyx_INCREF((PyObject *)__pyx_v_parser); - __Pyx_GIVEREF((PyObject *)__pyx_v_parser); - __Pyx_GOTREF((PyObject *)__pyx_v_self->parser); - __Pyx_DECREF((PyObject *)__pyx_v_self->parser); - __pyx_v_self->parser = __pyx_v_parser; - - /* "csimdjson.pyx":291 - * cdef Object self = Object.__new__(Object) - * self.parser = parser - * self.c_element = src.get_object() # <<<<<<<<<<<<<< - * self.c_parser = parser.c_parser - * return self - */ - try { - __pyx_t_2 = __pyx_v_src.get_object(); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 291, __pyx_L1_error) - } - __pyx_v_self->c_element = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_2); - - /* "csimdjson.pyx":292 - * self.parser = parser - * self.c_element = src.get_object() - * self.c_parser = parser.c_parser # <<<<<<<<<<<<<< - * return self - * - */ - __pyx_t_3 = __pyx_v_parser->c_parser; - __pyx_v_self->c_parser = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_3); - - /* "csimdjson.pyx":293 - * self.c_element = src.get_object() - * self.c_parser = parser.c_parser - * return self # <<<<<<<<<<<<<< - * - * def __getitem__(self, key): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self); - __pyx_r = ((PyObject *)__pyx_v_self); - goto __pyx_L0; - - /* "csimdjson.pyx":288 - * - * @staticmethod - * cdef inline from_element(Parser parser, simd_element src): # <<<<<<<<<<<<<< - * cdef Object self = Object.__new__(Object) - * self.parser = parser - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_AddTraceback("csimdjson.Object.from_element", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = 0; - __pyx_L0:; - __Pyx_XDECREF((PyObject *)__pyx_v_self); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":295 - * return self - * - * def __getitem__(self, key): # <<<<<<<<<<<<<< - * return element_to_primitive( - * self.parser, - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_1__getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_key); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_6Object_1__getitem__(PyObject *__pyx_v_self, PyObject *__pyx_v_key) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__getitem__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Object___getitem__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self), ((PyObject *)__pyx_v_key)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object___getitem__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_key) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - char const *__pyx_t_3; - simdjson::dom::element __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__getitem__", 0); - - /* "csimdjson.pyx":296 - * - * def __getitem__(self, key): - * return element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element[str_as_bytes(key)] - */ - __Pyx_XDECREF(__pyx_r); - - /* "csimdjson.pyx":297 - * def __getitem__(self, key): - * return element_to_primitive( - * self.parser, # <<<<<<<<<<<<<< - * self.c_element[str_as_bytes(key)] - * ) - */ - __pyx_t_1 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_1); - - /* "csimdjson.pyx":298 - * return element_to_primitive( - * self.parser, - * self.c_element[str_as_bytes(key)] # <<<<<<<<<<<<<< - * ) - * - */ - __pyx_t_2 = __pyx_f_9csimdjson_str_as_bytes(__pyx_v_key); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 298, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (unlikely(__pyx_t_2 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 298, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyBytes_AsString(__pyx_t_2); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) __PYX_ERR(0, 298, __pyx_L1_error) - try { - __pyx_t_4 = __pyx_v_self->c_element[__pyx_t_3]; - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 298, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "csimdjson.pyx":296 - * - * def __getitem__(self, key): - * return element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element[str_as_bytes(key)] - */ - __pyx_t_2 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_1), __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 296, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":295 - * return self - * - * def __getitem__(self, key): # <<<<<<<<<<<<<< - * return element_to_primitive( - * self.parser, - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Object.__getitem__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":301 - * ) - * - * def get(self, key, default=None): # <<<<<<<<<<<<<< - * """ - * Return the value of `key`, or `default` if the key does - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_3get(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_2get, "Object.get(self, key, default=None)\n\n Return the value of `key`, or `default` if the key does\n not exist.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_3get = {"get", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_3get, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_2get}; -static PyObject *__pyx_pw_9csimdjson_6Object_3get(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_key = 0; - PyObject *__pyx_v_default = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[2] = {0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("get (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 301, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_key,&__pyx_n_s_default,0}; - values[1] = __Pyx_Arg_NewRef_FASTCALL(((PyObject *)Py_None)); - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_key)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 301, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_default); - if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 301, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "get") < 0)) __PYX_ERR(0, 301, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_key = values[0]; - __pyx_v_default = values[1]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("get", 0, 1, 2, __pyx_nargs); __PYX_ERR(0, 301, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Object.get", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_2get(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self), __pyx_v_key, __pyx_v_default); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_2get(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_key, PyObject *__pyx_v_default) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get", 0); - - /* "csimdjson.pyx":306 - * not exist. - * """ - * try: # <<<<<<<<<<<<<< - * return self[key] - * except KeyError: - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "csimdjson.pyx":307 - * """ - * try: - * return self[key] # <<<<<<<<<<<<<< - * except KeyError: - * return default - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_4 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_key); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 307, __pyx_L3_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_r = __pyx_t_4; - __pyx_t_4 = 0; - goto __pyx_L7_try_return; - - /* "csimdjson.pyx":306 - * not exist. - * """ - * try: # <<<<<<<<<<<<<< - * return self[key] - * except KeyError: - */ - } - __pyx_L3_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":308 - * try: - * return self[key] - * except KeyError: # <<<<<<<<<<<<<< - * return default - * - */ - __pyx_t_5 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_KeyError); - if (__pyx_t_5) { - __Pyx_AddTraceback("csimdjson.Object.get", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_4, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(0, 308, __pyx_L5_except_error) - __Pyx_XGOTREF(__pyx_t_4); - __Pyx_XGOTREF(__pyx_t_6); - __Pyx_XGOTREF(__pyx_t_7); - - /* "csimdjson.pyx":309 - * return self[key] - * except KeyError: - * return default # <<<<<<<<<<<<<< - * - * def __len__(self): - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_v_default); - __pyx_r = __pyx_v_default; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L6_except_return; - } - goto __pyx_L5_except_error; - - /* "csimdjson.pyx":306 - * not exist. - * """ - * try: # <<<<<<<<<<<<<< - * return self[key] - * except KeyError: - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L7_try_return:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L0; - __pyx_L6_except_return:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L0; - } - - /* "csimdjson.pyx":301 - * ) - * - * def get(self, key, default=None): # <<<<<<<<<<<<<< - * """ - * Return the value of `key`, or `default` if the key does - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("csimdjson.Object.get", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":311 - * return default - * - * def __len__(self): # <<<<<<<<<<<<<< - * return self.c_element.size() - * - */ - -/* Python wrapper */ -static Py_ssize_t __pyx_pw_9csimdjson_6Object_5__len__(PyObject *__pyx_v_self); /*proto*/ -static Py_ssize_t __pyx_pw_9csimdjson_6Object_5__len__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Object_4__len__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static Py_ssize_t __pyx_pf_9csimdjson_6Object_4__len__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - Py_ssize_t __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__len__", 0); - - /* "csimdjson.pyx":312 - * - * def __len__(self): - * return self.c_element.size() # <<<<<<<<<<<<<< - * - * def __contains__(self, key): - */ - __pyx_r = __pyx_v_self->c_element.size(); - goto __pyx_L0; - - /* "csimdjson.pyx":311 - * return default - * - * def __len__(self): # <<<<<<<<<<<<<< - * return self.c_element.size() - * - */ - - /* function exit code */ - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":314 - * return self.c_element.size() - * - * def __contains__(self, key): # <<<<<<<<<<<<<< - * try: - * self.c_element[str_as_bytes(key)] - */ - -/* Python wrapper */ -static int __pyx_pw_9csimdjson_6Object_7__contains__(PyObject *__pyx_v_self, PyObject *__pyx_v_key); /*proto*/ -static int __pyx_pw_9csimdjson_6Object_7__contains__(PyObject *__pyx_v_self, PyObject *__pyx_v_key) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__contains__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Object_6__contains__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self), ((PyObject *)__pyx_v_key)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_9csimdjson_6Object_6__contains__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_key) { - int __pyx_r; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - char const *__pyx_t_5; - simdjson::dom::element __pyx_t_6; - int __pyx_t_7; - PyObject *__pyx_t_8 = NULL; - PyObject *__pyx_t_9 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__contains__", 0); - - /* "csimdjson.pyx":315 - * - * def __contains__(self, key): - * try: # <<<<<<<<<<<<<< - * self.c_element[str_as_bytes(key)] - * except KeyError: - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "csimdjson.pyx":316 - * def __contains__(self, key): - * try: - * self.c_element[str_as_bytes(key)] # <<<<<<<<<<<<<< - * except KeyError: - * return False - */ - __pyx_t_4 = __pyx_f_9csimdjson_str_as_bytes(__pyx_v_key); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 316, __pyx_L3_error) - __Pyx_GOTREF(__pyx_t_4); - if (unlikely(__pyx_t_4 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 316, __pyx_L3_error) - } - __pyx_t_5 = __Pyx_PyBytes_AsString(__pyx_t_4); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 316, __pyx_L3_error) - try { - __pyx_t_6 = __pyx_v_self->c_element[__pyx_t_5]; - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 316, __pyx_L3_error) - } - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":315 - * - * def __contains__(self, key): - * try: # <<<<<<<<<<<<<< - * self.c_element[str_as_bytes(key)] - * except KeyError: - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L8_try_end; - __pyx_L3_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":317 - * try: - * self.c_element[str_as_bytes(key)] - * except KeyError: # <<<<<<<<<<<<<< - * return False - * return True - */ - __pyx_t_7 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_KeyError); - if (__pyx_t_7) { - __Pyx_AddTraceback("csimdjson.Object.__contains__", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_4, &__pyx_t_8, &__pyx_t_9) < 0) __PYX_ERR(0, 317, __pyx_L5_except_error) - __Pyx_XGOTREF(__pyx_t_4); - __Pyx_XGOTREF(__pyx_t_8); - __Pyx_XGOTREF(__pyx_t_9); - - /* "csimdjson.pyx":318 - * self.c_element[str_as_bytes(key)] - * except KeyError: - * return False # <<<<<<<<<<<<<< - * return True - * - */ - __pyx_r = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - goto __pyx_L6_except_return; - } - goto __pyx_L5_except_error; - - /* "csimdjson.pyx":315 - * - * def __contains__(self, key): - * try: # <<<<<<<<<<<<<< - * self.c_element[str_as_bytes(key)] - * except KeyError: - */ - __pyx_L5_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L6_except_return:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L0; - __pyx_L8_try_end:; - } - - /* "csimdjson.pyx":319 - * except KeyError: - * return False - * return True # <<<<<<<<<<<<<< - * - * def __iter__(self): - */ - __pyx_r = 1; - goto __pyx_L0; - - /* "csimdjson.pyx":314 - * return self.c_element.size() - * - * def __contains__(self, key): # <<<<<<<<<<<<<< - * try: - * self.c_element[str_as_bytes(key)] - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_XDECREF(__pyx_t_9); - __Pyx_AddTraceback("csimdjson.Object.__contains__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_9csimdjson_6Object_10generator1(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "csimdjson.pyx":321 - * return True - * - * def __iter__(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all keys in this `Object`. - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_9__iter__(PyObject *__pyx_v_self); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_8__iter__, "\n Returns an iterator over all keys in this `Object`.\n "); -#if CYTHON_UPDATE_DESCRIPTOR_DOC -struct wrapperbase __pyx_wrapperbase_9csimdjson_6Object_8__iter__; -#endif -static PyObject *__pyx_pw_9csimdjson_6Object_9__iter__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Object_8__iter__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_8__iter__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__iter__", 0); - __pyx_cur_scope = (struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)__pyx_tp_new_9csimdjson___pyx_scope_struct_1___iter__(__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 321, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_self = __pyx_v_self; - __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_9csimdjson_6Object_10generator1, NULL, (PyObject *) __pyx_cur_scope, __pyx_n_s_iter, __pyx_n_s_Object___iter, __pyx_n_s_csimdjson); if (unlikely(!gen)) __PYX_ERR(0, 321, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Object.__iter__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_9csimdjson_6Object_10generator1(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *__pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__iter__", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L6_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 321, __pyx_L1_error) - - /* "csimdjson.pyx":328 - * size_t size - * const char *data - * simd_object.iterator it = self.c_element.begin() # <<<<<<<<<<<<<< - * - * while it != self.c_element.end(): - */ - __pyx_cur_scope->__pyx_v_it = __pyx_cur_scope->__pyx_v_self->c_element.begin(); - - /* "csimdjson.pyx":330 - * simd_object.iterator it = self.c_element.begin() - * - * while it != self.c_element.end(): # <<<<<<<<<<<<<< - * data = it.key_c_str() - * size = it.key_length() - */ - while (1) { - __pyx_t_1 = (__pyx_cur_scope->__pyx_v_it != __pyx_cur_scope->__pyx_v_self->c_element.end()); - if (!__pyx_t_1) break; - - /* "csimdjson.pyx":331 - * - * while it != self.c_element.end(): - * data = it.key_c_str() # <<<<<<<<<<<<<< - * size = it.key_length() - * yield data[:size] - */ - __pyx_cur_scope->__pyx_v_data = __pyx_cur_scope->__pyx_v_it.key_c_str(); - - /* "csimdjson.pyx":332 - * while it != self.c_element.end(): - * data = it.key_c_str() - * size = it.key_length() # <<<<<<<<<<<<<< - * yield data[:size] - * preincrement(it) - */ - __pyx_cur_scope->__pyx_v_size = __pyx_cur_scope->__pyx_v_it.key_length(); - - /* "csimdjson.pyx":333 - * data = it.key_c_str() - * size = it.key_length() - * yield data[:size] # <<<<<<<<<<<<<< - * preincrement(it) - * - */ - __pyx_t_2 = __Pyx_PyUnicode_FromStringAndSize(__pyx_cur_scope->__pyx_v_data + 0, __pyx_cur_scope->__pyx_v_size - 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 333, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L6_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 333, __pyx_L1_error) - - /* "csimdjson.pyx":334 - * size = it.key_length() - * yield data[:size] - * preincrement(it) # <<<<<<<<<<<<<< - * - * keys = __iter__ - */ - (void)((++__pyx_cur_scope->__pyx_v_it)); - } - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* "csimdjson.pyx":321 - * return True - * - * def __iter__(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all keys in this `Object`. - */ - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_Generator_Replace_StopIteration(0); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("__iter__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_9csimdjson_6Object_13generator2(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "csimdjson.pyx":338 - * keys = __iter__ - * - * def values(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over of all values in this `Object`. - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_12values(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_11values, "Object.values(self)\n\n Returns an iterator over of all values in this `Object`.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_12values = {"values", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_12values, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_11values}; -static PyObject *__pyx_pw_9csimdjson_6Object_12values(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("values (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 338, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("values", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "values", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Object.values", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_11values(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_11values(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("values", 0); - __pyx_cur_scope = (struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)__pyx_tp_new_9csimdjson___pyx_scope_struct_2_values(__pyx_ptype_9csimdjson___pyx_scope_struct_2_values, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 338, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_self = __pyx_v_self; - __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_9csimdjson_6Object_13generator2, __pyx_codeobj__11, (PyObject *) __pyx_cur_scope, __pyx_n_s_values, __pyx_n_s_Object_values, __pyx_n_s_csimdjson); if (unlikely(!gen)) __PYX_ERR(0, 338, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Object.values", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_9csimdjson_6Object_13generator2(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *__pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("values", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L6_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 338, __pyx_L1_error) - - /* "csimdjson.pyx":342 - * Returns an iterator over of all values in this `Object`. - * """ - * cdef simd_object.iterator it = self.c_element.begin() # <<<<<<<<<<<<<< - * while it != self.c_element.end(): - * yield element_to_primitive(self.parser, it.value(), True) - */ - __pyx_cur_scope->__pyx_v_it = __pyx_cur_scope->__pyx_v_self->c_element.begin(); - - /* "csimdjson.pyx":343 - * """ - * cdef simd_object.iterator it = self.c_element.begin() - * while it != self.c_element.end(): # <<<<<<<<<<<<<< - * yield element_to_primitive(self.parser, it.value(), True) - * preincrement(it) - */ - while (1) { - __pyx_t_1 = (__pyx_cur_scope->__pyx_v_it != __pyx_cur_scope->__pyx_v_self->c_element.end()); - if (!__pyx_t_1) break; - - /* "csimdjson.pyx":344 - * cdef simd_object.iterator it = self.c_element.begin() - * while it != self.c_element.end(): - * yield element_to_primitive(self.parser, it.value(), True) # <<<<<<<<<<<<<< - * preincrement(it) - * - */ - __pyx_t_2 = ((PyObject *)__pyx_cur_scope->__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_2); - __pyx_t_4.__pyx_n = 1; - __pyx_t_4.recursive = 1; - __pyx_t_3 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_2), __pyx_cur_scope->__pyx_v_it.value(), &__pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 344, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L6_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 344, __pyx_L1_error) - - /* "csimdjson.pyx":345 - * while it != self.c_element.end(): - * yield element_to_primitive(self.parser, it.value(), True) - * preincrement(it) # <<<<<<<<<<<<<< - * - * def items(self): - */ - (void)((++__pyx_cur_scope->__pyx_v_it)); - } - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* "csimdjson.pyx":338 - * keys = __iter__ - * - * def values(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over of all values in this `Object`. - */ - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_Generator_Replace_StopIteration(0); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("values", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_9csimdjson_6Object_16generator3(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "csimdjson.pyx":347 - * preincrement(it) - * - * def items(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all the (key, value) pairs in this - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_15items(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_14items, "Object.items(self)\n\n Returns an iterator over all the (key, value) pairs in this\n `Object`.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_15items = {"items", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_15items, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_14items}; -static PyObject *__pyx_pw_9csimdjson_6Object_15items(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("items (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 347, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("items", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "items", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Object.items", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_14items(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_14items(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("items", 0); - __pyx_cur_scope = (struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)__pyx_tp_new_9csimdjson___pyx_scope_struct_3_items(__pyx_ptype_9csimdjson___pyx_scope_struct_3_items, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 347, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_self = __pyx_v_self; - __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_9csimdjson_6Object_16generator3, __pyx_codeobj__12, (PyObject *) __pyx_cur_scope, __pyx_n_s_items, __pyx_n_s_Object_items, __pyx_n_s_csimdjson); if (unlikely(!gen)) __PYX_ERR(0, 347, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Object.items", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_9csimdjson_6Object_16generator3(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *__pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_5; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("items", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L6_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 347, __pyx_L1_error) - - /* "csimdjson.pyx":355 - * size_t size - * const char *data - * simd_object.iterator it = self.c_element.begin() # <<<<<<<<<<<<<< - * - * while it != self.c_element.end(): - */ - __pyx_cur_scope->__pyx_v_it = __pyx_cur_scope->__pyx_v_self->c_element.begin(); - - /* "csimdjson.pyx":357 - * simd_object.iterator it = self.c_element.begin() - * - * while it != self.c_element.end(): # <<<<<<<<<<<<<< - * data = it.key_c_str() - * size = it.key_length() - */ - while (1) { - __pyx_t_1 = (__pyx_cur_scope->__pyx_v_it != __pyx_cur_scope->__pyx_v_self->c_element.end()); - if (!__pyx_t_1) break; - - /* "csimdjson.pyx":358 - * - * while it != self.c_element.end(): - * data = it.key_c_str() # <<<<<<<<<<<<<< - * size = it.key_length() - * yield ( - */ - __pyx_cur_scope->__pyx_v_data = __pyx_cur_scope->__pyx_v_it.key_c_str(); - - /* "csimdjson.pyx":359 - * while it != self.c_element.end(): - * data = it.key_c_str() - * size = it.key_length() # <<<<<<<<<<<<<< - * yield ( - * data[:size], - */ - __pyx_cur_scope->__pyx_v_size = __pyx_cur_scope->__pyx_v_it.key_length(); - - /* "csimdjson.pyx":361 - * size = it.key_length() - * yield ( - * data[:size], # <<<<<<<<<<<<<< - * element_to_primitive(self.parser, it.value(), True) - * ) - */ - __pyx_t_2 = __Pyx_PyUnicode_FromStringAndSize(__pyx_cur_scope->__pyx_v_data + 0, __pyx_cur_scope->__pyx_v_size - 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 361, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - - /* "csimdjson.pyx":362 - * yield ( - * data[:size], - * element_to_primitive(self.parser, it.value(), True) # <<<<<<<<<<<<<< - * ) - * preincrement(it) - */ - __pyx_t_3 = ((PyObject *)__pyx_cur_scope->__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_3); - __pyx_t_5.__pyx_n = 1; - __pyx_t_5.recursive = 1; - __pyx_t_4 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_3), __pyx_cur_scope->__pyx_v_it.value(), &__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 362, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - - /* "csimdjson.pyx":361 - * size = it.key_length() - * yield ( - * data[:size], # <<<<<<<<<<<<<< - * element_to_primitive(self.parser, it.value(), True) - * ) - */ - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 361, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2)) __PYX_ERR(0, 361, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_4); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_4)) __PYX_ERR(0, 361, __pyx_L1_error); - __pyx_t_2 = 0; - __pyx_t_4 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L6_resume_from_yield:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 360, __pyx_L1_error) - - /* "csimdjson.pyx":364 - * element_to_primitive(self.parser, it.value(), True) - * ) - * preincrement(it) # <<<<<<<<<<<<<< - * - * def at_pointer(self, json_pointer): - */ - (void)((++__pyx_cur_scope->__pyx_v_it)); - } - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* "csimdjson.pyx":347 - * preincrement(it) - * - * def items(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all the (key, value) pairs in this - */ - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_Generator_Replace_StopIteration(0); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("items", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":366 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_18at_pointer(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_17at_pointer, "Object.at_pointer(self, json_pointer)\nGet the value at the given JSON pointer."); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_18at_pointer = {"at_pointer", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_18at_pointer, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_17at_pointer}; -static PyObject *__pyx_pw_9csimdjson_6Object_18at_pointer(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_json_pointer = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("at_pointer (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 366, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_json_pointer,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_json_pointer)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 366, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "at_pointer") < 0)) __PYX_ERR(0, 366, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v_json_pointer = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("at_pointer", 1, 1, 1, __pyx_nargs); __PYX_ERR(0, 366, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Object.at_pointer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_17at_pointer(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self), __pyx_v_json_pointer); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_17at_pointer(struct __pyx_obj_9csimdjson_Object *__pyx_v_self, PyObject *__pyx_v_json_pointer) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - char const *__pyx_t_3; - simdjson::dom::element __pyx_t_4; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("at_pointer", 0); - - /* "csimdjson.pyx":368 - * def at_pointer(self, json_pointer): - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element.at_pointer( - */ - __Pyx_XDECREF(__pyx_r); - - /* "csimdjson.pyx":369 - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - * self.parser, # <<<<<<<<<<<<<< - * self.c_element.at_pointer( - * str_as_bytes(json_pointer) - */ - __pyx_t_1 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_1); - - /* "csimdjson.pyx":371 - * self.parser, - * self.c_element.at_pointer( - * str_as_bytes(json_pointer) # <<<<<<<<<<<<<< - * ) - * ) - */ - __pyx_t_2 = __pyx_f_9csimdjson_str_as_bytes(__pyx_v_json_pointer); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 371, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - if (unlikely(__pyx_t_2 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 371, __pyx_L1_error) - } - __pyx_t_3 = __Pyx_PyBytes_AsString(__pyx_t_2); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) __PYX_ERR(0, 371, __pyx_L1_error) - - /* "csimdjson.pyx":370 - * return element_to_primitive( - * self.parser, - * self.c_element.at_pointer( # <<<<<<<<<<<<<< - * str_as_bytes(json_pointer) - * ) - */ - try { - __pyx_t_4 = __pyx_v_self->c_element.at_pointer(__pyx_t_3); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 370, __pyx_L1_error) - } - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - - /* "csimdjson.pyx":368 - * def at_pointer(self, json_pointer): - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( # <<<<<<<<<<<<<< - * self.parser, - * self.c_element.at_pointer( - */ - __pyx_t_2 = __pyx_f_9csimdjson_element_to_primitive(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_1), __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 368, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":366 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Object.at_pointer", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":375 - * ) - * - * def as_dict(self): # <<<<<<<<<<<<<< - * """ - * Convert this `Object` to a regular python dictionary, - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_20as_dict(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_19as_dict, "Object.as_dict(self)\n\n Convert this `Object` to a regular python dictionary,\n recursively converting any objects or lists it finds.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_20as_dict = {"as_dict", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_20as_dict, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_19as_dict}; -static PyObject *__pyx_pw_9csimdjson_6Object_20as_dict(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("as_dict (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 375, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("as_dict", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "as_dict", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Object.as_dict", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_19as_dict(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_19as_dict(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("as_dict", 0); - - /* "csimdjson.pyx":380 - * recursively converting any objects or lists it finds. - * """ - * return object_to_dict(self.parser, self.c_element, True) # <<<<<<<<<<<<<< - * - * @property - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((PyObject *)__pyx_v_self->parser); - __Pyx_INCREF(__pyx_t_1); - __pyx_t_2 = __pyx_f_9csimdjson_object_to_dict(((struct __pyx_obj_9csimdjson_Parser *)__pyx_t_1), __pyx_v_self->c_element, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 380, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":375 - * ) - * - * def as_dict(self): # <<<<<<<<<<<<<< - * """ - * Convert this `Object` to a regular python dictionary, - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Object.as_dict", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":382 - * return object_to_dict(self.parser, self.c_element, True) - * - * @property # <<<<<<<<<<<<<< - * def mini(self): - * """ - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_4mini_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_6Object_4mini_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Object_4mini___get__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_4mini___get__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - std::string __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "csimdjson.pyx":389 - * :rtype: bytes - * """ - * return minify(self.c_element) # <<<<<<<<<<<<<< - * - * - */ - __Pyx_XDECREF(__pyx_r); - try { - __pyx_t_1 = simdjson::minify(__pyx_v_self->c_element); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 389, __pyx_L1_error) - } - __pyx_t_2 = __pyx_convert_PyBytes_string_to_py_std__in_string(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 389, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_INCREF(((PyObject*)__pyx_t_2)); - __pyx_r = __pyx_t_2; - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":382 - * return object_to_dict(self.parser, self.c_element, True) - * - * @property # <<<<<<<<<<<<<< - * def mini(self): - * """ - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_AddTraceback("csimdjson.Object.mini.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":283 - * Python objects. - * """ - * cdef readonly Parser parser # <<<<<<<<<<<<<< - * cdef simd_object c_element - * cdef shared_ptr[simd_parser] c_parser - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_6parser_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_6Object_6parser_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Object_6parser___get__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_6parser___get__(struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__", 0); - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF((PyObject *)__pyx_v_self->parser); - __pyx_r = ((PyObject *)__pyx_v_self->parser); - goto __pyx_L0; - - /* function exit code */ - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_22__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_21__reduce_cython__, "Object.__reduce_cython__(self)"); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_22__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_22__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_21__reduce_cython__}; -static PyObject *__pyx_pw_9csimdjson_6Object_22__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Object.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_21__reduce_cython__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_21__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Object *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_self_c_element_self_c_parser_can, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Object.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Object_24__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Object_23__setstate_cython__, "Object.__setstate_cython__(self, __pyx_state)"); -static PyMethodDef __pyx_mdef_9csimdjson_6Object_24__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_24__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_23__setstate_cython__}; -static PyObject *__pyx_pw_9csimdjson_6Object_24__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Object.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Object_23__setstate_cython__(((struct __pyx_obj_9csimdjson_Object *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Object_23__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Object *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_self_c_element_self_c_parser_can, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Object.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":404 - * cdef shared_ptr[simd_parser] c_parser - * - * def __cinit__(self, size_t max_capacity=SIMDJSON_MAXSIZE_BYTES): # <<<<<<<<<<<<<< - * self.c_parser = make_shared[simd_parser](max_capacity) - * - */ - -/* Python wrapper */ -static int __pyx_pw_9csimdjson_6Parser_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static int __pyx_pw_9csimdjson_6Parser_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - size_t __pyx_v_max_capacity; - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0); - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 404, __pyx_L3_error) - #endif - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_max_capacity,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_VARARGS(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_VARARGS(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_max_capacity); - if (value) { values[0] = __Pyx_Arg_NewRef_VARARGS(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 404, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__cinit__") < 0)) __PYX_ERR(0, 404, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_VARARGS(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - } - if (values[0]) { - __pyx_v_max_capacity = __Pyx_PyInt_As_size_t(values[0]); if (unlikely((__pyx_v_max_capacity == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 404, __pyx_L3_error) - } else { - __pyx_v_max_capacity = __pyx_k__13; - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 0, 1, __pyx_nargs); __PYX_ERR(0, 404, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Parser.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return -1; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Parser___cinit__(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self), __pyx_v_max_capacity); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_VARARGS(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_9csimdjson_6Parser___cinit__(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, size_t __pyx_v_max_capacity) { - int __pyx_r; - __Pyx_RefNannyDeclarations - std::shared_ptr __pyx_t_1; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__cinit__", 0); - - /* "csimdjson.pyx":405 - * - * def __cinit__(self, size_t max_capacity=SIMDJSON_MAXSIZE_BYTES): - * self.c_parser = make_shared[simd_parser](max_capacity) # <<<<<<<<<<<<<< - * - * def __dealloc__(self): - */ - try { - __pyx_t_1 = std::make_shared(__pyx_v_max_capacity); - } catch(...) { - __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 405, __pyx_L1_error) - } - __pyx_v_self->c_parser = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_1); - - /* "csimdjson.pyx":404 - * cdef shared_ptr[simd_parser] c_parser - * - * def __cinit__(self, size_t max_capacity=SIMDJSON_MAXSIZE_BYTES): # <<<<<<<<<<<<<< - * self.c_parser = make_shared[simd_parser](max_capacity) - * - */ - - /* function exit code */ - __pyx_r = 0; - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Parser.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":407 - * self.c_parser = make_shared[simd_parser](max_capacity) - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * self.c_parser.reset() - * - */ - -/* Python wrapper */ -static void __pyx_pw_9csimdjson_6Parser_3__dealloc__(PyObject *__pyx_v_self); /*proto*/ -static void __pyx_pw_9csimdjson_6Parser_3__dealloc__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_pf_9csimdjson_6Parser_2__dealloc__(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -static void __pyx_pf_9csimdjson_6Parser_2__dealloc__(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__dealloc__", 0); - - /* "csimdjson.pyx":408 - * - * def __dealloc__(self): - * self.c_parser.reset() # <<<<<<<<<<<<<< - * - * def parse(self, src not None, bint recursive=False): - */ - __pyx_v_self->c_parser.reset(); - - /* "csimdjson.pyx":407 - * self.c_parser = make_shared[simd_parser](max_capacity) - * - * def __dealloc__(self): # <<<<<<<<<<<<<< - * self.c_parser.reset() - * - */ - - /* function exit code */ - __Pyx_RefNannyFinishContext(); -} - -/* "csimdjson.pyx":410 - * self.c_parser.reset() - * - * def parse(self, src not None, bint recursive=False): # <<<<<<<<<<<<<< - * """Parse the given JSON document. - * - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Parser_5parse(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Parser_4parse, "Parser.parse(self, src, bool recursive=False)\nParse the given JSON document.\n\n The source document may be a `str`, `bytes`, `bytearray`, or any other\n object that implements the buffer protocol.\n\n .. admonition:: Performance\n :class: tip\n\n While you can pass quite a few things to this method to be parsed,\n simple ``bytes`` will almost always be the fastest.\n\n If any :class:`~Object` or :class:`~Array` proxies still pointing to\n a previously-parsed document exist when this method is called, a\n ``RuntimeError`` may be raised.\n\n :param src: The document to parse.\n :param recursive: Recursively turn the document into real\n python objects instead of pysimdjson proxies.\n [default: False]\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Parser_5parse = {"parse", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_5parse, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_4parse}; -static PyObject *__pyx_pw_9csimdjson_6Parser_5parse(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_src = 0; - int __pyx_v_recursive; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[2] = {0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("parse (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 410, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_src,&__pyx_n_s_recursive,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_src)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 410, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_recursive); - if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 410, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "parse") < 0)) __PYX_ERR(0, 410, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_src = values[0]; - if (values[1]) { - __pyx_v_recursive = __Pyx_PyObject_IsTrue(values[1]); if (unlikely((__pyx_v_recursive == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 410, __pyx_L3_error) - } else { - __pyx_v_recursive = ((int)0); - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("parse", 0, 1, 2, __pyx_nargs); __PYX_ERR(0, 410, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Parser.parse", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - if (unlikely(((PyObject *)__pyx_v_src) == Py_None)) { - PyErr_Format(PyExc_TypeError, "Argument '%.200s' must not be None", "src"); __PYX_ERR(0, 410, __pyx_L1_error) - } - __pyx_r = __pyx_pf_9csimdjson_6Parser_4parse(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self), __pyx_v_src, __pyx_v_recursive); - - /* function exit code */ - goto __pyx_L0; - __pyx_L1_error:; - __pyx_r = NULL; - __pyx_L0:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Parser_4parse(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_src, int __pyx_v_recursive) { - __Pyx_memviewslice __pyx_v_data = { 0, 0, { 0 }, { 0 }, { 0 } }; - char const *__pyx_v_str_data; - char *__pyx_v_bytes_data; - Py_ssize_t __pyx_v_str_size; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - int __pyx_t_3; - simdjson::dom::element __pyx_t_4; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_5; - __Pyx_memviewslice __pyx_t_6 = { 0, 0, { 0 }, { 0 }, { 0 } }; - PyObject *__pyx_t_7 = NULL; - Py_ssize_t __pyx_t_8; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("parse", 0); - - /* "csimdjson.pyx":434 - * # may not occur until much later than expected by a user. We may need - * # to recommend against re-use on PyPy. - * if self.c_parser.use_count() > 1: # <<<<<<<<<<<<<< - * raise RuntimeError( - * 'Tried to re-use a parser while simdjson.Object and/or' - */ - __pyx_t_1 = (__pyx_v_self->c_parser.use_count() > 1); - if (unlikely(__pyx_t_1)) { - - /* "csimdjson.pyx":435 - * # to recommend against re-use on PyPy. - * if self.c_parser.use_count() > 1: - * raise RuntimeError( # <<<<<<<<<<<<<< - * 'Tried to re-use a parser while simdjson.Object and/or' - * ' simdjson.Array objects still exist referencing the old' - */ - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__14, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 435, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_Raise(__pyx_t_2, 0, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __PYX_ERR(0, 435, __pyx_L1_error) - - /* "csimdjson.pyx":434 - * # may not occur until much later than expected by a user. We may need - * # to recommend against re-use on PyPy. - * if self.c_parser.use_count() > 1: # <<<<<<<<<<<<<< - * raise RuntimeError( - * 'Tried to re-use a parser while simdjson.Object and/or' - */ - } - - /* "csimdjson.pyx":443 - * cdef: - * const unsigned char[::1] data - * const char * str_data = NULL # <<<<<<<<<<<<<< - * char * bytes_data = NULL - * Py_ssize_t str_size = 0 - */ - __pyx_v_str_data = NULL; - - /* "csimdjson.pyx":444 - * const unsigned char[::1] data - * const char * str_data = NULL - * char * bytes_data = NULL # <<<<<<<<<<<<<< - * Py_ssize_t str_size = 0 - * - */ - __pyx_v_bytes_data = NULL; - - /* "csimdjson.pyx":445 - * const char * str_data = NULL - * char * bytes_data = NULL - * Py_ssize_t str_size = 0 # <<<<<<<<<<<<<< - * - * if isinstance(src, bytes): - */ - __pyx_v_str_size = 0; - - /* "csimdjson.pyx":447 - * Py_ssize_t str_size = 0 - * - * if isinstance(src, bytes): # <<<<<<<<<<<<<< - * # Handling bytes is drastically faster than using the buffer API. - * PyBytes_AsStringAndSize(src, &bytes_data, &str_size) - */ - __pyx_t_1 = PyBytes_Check(__pyx_v_src); - if (__pyx_t_1) { - - /* "csimdjson.pyx":449 - * if isinstance(src, bytes): - * # Handling bytes is drastically faster than using the buffer API. - * PyBytes_AsStringAndSize(src, &bytes_data, &str_size) # <<<<<<<<<<<<<< - * return element_to_primitive( - * self, - */ - __pyx_t_3 = PyBytes_AsStringAndSize(__pyx_v_src, (&__pyx_v_bytes_data), (&__pyx_v_str_size)); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(0, 449, __pyx_L1_error) - - /* "csimdjson.pyx":450 - * # Handling bytes is drastically faster than using the buffer API. - * PyBytes_AsStringAndSize(src, &bytes_data, &str_size) - * return element_to_primitive( # <<<<<<<<<<<<<< - * self, - * dereference(self.c_parser).parse( - */ - __Pyx_XDECREF(__pyx_r); - - /* "csimdjson.pyx":452 - * return element_to_primitive( - * self, - * dereference(self.c_parser).parse( # <<<<<<<<<<<<<< - * bytes_data, - * str_size, - */ - try { - __pyx_t_4 = (*__pyx_v_self->c_parser).parse(__pyx_v_bytes_data, __pyx_v_str_size, 1); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 452, __pyx_L1_error) - } - - /* "csimdjson.pyx":450 - * # Handling bytes is drastically faster than using the buffer API. - * PyBytes_AsStringAndSize(src, &bytes_data, &str_size) - * return element_to_primitive( # <<<<<<<<<<<<<< - * self, - * dereference(self.c_parser).parse( - */ - __pyx_t_5.__pyx_n = 1; - __pyx_t_5.recursive = __pyx_v_recursive; - __pyx_t_2 = __pyx_f_9csimdjson_element_to_primitive(__pyx_v_self, __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), &__pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 450, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":447 - * Py_ssize_t str_size = 0 - * - * if isinstance(src, bytes): # <<<<<<<<<<<<<< - * # Handling bytes is drastically faster than using the buffer API. - * PyBytes_AsStringAndSize(src, &bytes_data, &str_size) - */ - } - - /* "csimdjson.pyx":459 - * recursive - * ) - * elif isinstance(src, str): # <<<<<<<<<<<<<< - * # str can't be handled using the buffer API, oddly, even if you - * # know the encoding. - */ - __pyx_t_1 = PyUnicode_Check(__pyx_v_src); - if (__pyx_t_1) { - - /* "csimdjson.pyx":462 - * # str can't be handled using the buffer API, oddly, even if you - * # know the encoding. - * str_data = PyUnicode_AsUTF8AndSize(src, &str_size) # <<<<<<<<<<<<<< - * return element_to_primitive( - * self, - */ - __pyx_v_str_data = PyUnicode_AsUTF8AndSize(__pyx_v_src, (&__pyx_v_str_size)); - - /* "csimdjson.pyx":463 - * # know the encoding. - * str_data = PyUnicode_AsUTF8AndSize(src, &str_size) - * return element_to_primitive( # <<<<<<<<<<<<<< - * self, - * dereference(self.c_parser).parse( - */ - __Pyx_XDECREF(__pyx_r); - - /* "csimdjson.pyx":465 - * return element_to_primitive( - * self, - * dereference(self.c_parser).parse( # <<<<<<<<<<<<<< - * str_data, - * str_size, - */ - try { - __pyx_t_4 = (*__pyx_v_self->c_parser).parse(__pyx_v_str_data, __pyx_v_str_size, 1); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 465, __pyx_L1_error) - } - - /* "csimdjson.pyx":463 - * # know the encoding. - * str_data = PyUnicode_AsUTF8AndSize(src, &str_size) - * return element_to_primitive( # <<<<<<<<<<<<<< - * self, - * dereference(self.c_parser).parse( - */ - __pyx_t_5.__pyx_n = 1; - __pyx_t_5.recursive = __pyx_v_recursive; - __pyx_t_2 = __pyx_f_9csimdjson_element_to_primitive(__pyx_v_self, __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), &__pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 463, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_r = __pyx_t_2; - __pyx_t_2 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":459 - * recursive - * ) - * elif isinstance(src, str): # <<<<<<<<<<<<<< - * # str can't be handled using the buffer API, oddly, even if you - * # know the encoding. - */ - } - - /* "csimdjson.pyx":476 - * # memoryview, etc). This is significantly slower than the - * # type-specific APIs, but gives much greater compatibility. - * data = src # <<<<<<<<<<<<<< - * - * if data.size == 0: - */ - /*else*/ { - __pyx_t_6 = __Pyx_PyObject_to_MemoryviewSlice_dc_unsigned_char__const__(__pyx_v_src, 0); if (unlikely(!__pyx_t_6.memview)) __PYX_ERR(0, 476, __pyx_L1_error) - __pyx_v_data = __pyx_t_6; - __pyx_t_6.memview = NULL; - __pyx_t_6.data = NULL; - - /* "csimdjson.pyx":478 - * data = src - * - * if data.size == 0: # <<<<<<<<<<<<<< - * # If we were given a completely empty buffer, trying to access - * # a stride in the next step will cause a (potentially - */ - __pyx_t_2 = __pyx_memoryview_fromslice(__pyx_v_data, 1, (PyObject *(*)(char *)) __pyx_memview_get_unsigned_char__const__, (int (*)(char *, PyObject *)) NULL, 0);; if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 478, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_size); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 478, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_1 = (__Pyx_PyInt_BoolEqObjC(__pyx_t_7, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 478, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely(__pyx_t_1)) { - - /* "csimdjson.pyx":483 - * # confusing) IndexError. This isn't a very good error message, - * # but it's identical to the one simdjson would have raised. - * raise ValueError('EMPTY: no JSON found') # <<<<<<<<<<<<<< - * - * return element_to_primitive( - */ - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__15, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 483, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_Raise(__pyx_t_7, 0, 0, 0); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __PYX_ERR(0, 483, __pyx_L1_error) - - /* "csimdjson.pyx":478 - * data = src - * - * if data.size == 0: # <<<<<<<<<<<<<< - * # If we were given a completely empty buffer, trying to access - * # a stride in the next step will cause a (potentially - */ - } - - /* "csimdjson.pyx":485 - * raise ValueError('EMPTY: no JSON found') - * - * return element_to_primitive( # <<<<<<<<<<<<<< - * self, - * dereference(self.c_parser).parse( - */ - __Pyx_XDECREF(__pyx_r); - - /* "csimdjson.pyx":488 - * self, - * dereference(self.c_parser).parse( - * &data[0], # <<<<<<<<<<<<<< - * data.shape[0], - * True - */ - __pyx_t_8 = 0; - __pyx_t_3 = -1; - if (__pyx_t_8 < 0) { - __pyx_t_8 += __pyx_v_data.shape[0]; - if (unlikely(__pyx_t_8 < 0)) __pyx_t_3 = 0; - } else if (unlikely(__pyx_t_8 >= __pyx_v_data.shape[0])) __pyx_t_3 = 0; - if (unlikely(__pyx_t_3 != -1)) { - __Pyx_RaiseBufferIndexError(__pyx_t_3); - __PYX_ERR(0, 488, __pyx_L1_error) - } - - /* "csimdjson.pyx":487 - * return element_to_primitive( - * self, - * dereference(self.c_parser).parse( # <<<<<<<<<<<<<< - * &data[0], - * data.shape[0], - */ - try { - __pyx_t_4 = (*__pyx_v_self->c_parser).parse(((char const *)(&(*((unsigned char const *) ( /* dim=0 */ ((char *) (((unsigned char const *) __pyx_v_data.data) + __pyx_t_8)) ))))), (__pyx_v_data.shape[0]), 1); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 487, __pyx_L1_error) - } - - /* "csimdjson.pyx":485 - * raise ValueError('EMPTY: no JSON found') - * - * return element_to_primitive( # <<<<<<<<<<<<<< - * self, - * dereference(self.c_parser).parse( - */ - __pyx_t_5.__pyx_n = 1; - __pyx_t_5.recursive = __pyx_v_recursive; - __pyx_t_7 = __pyx_f_9csimdjson_element_to_primitive(__pyx_v_self, __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_4), &__pyx_t_5); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 485, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; - goto __pyx_L0; - } - - /* "csimdjson.pyx":410 - * self.c_parser.reset() - * - * def parse(self, src not None, bint recursive=False): # <<<<<<<<<<<<<< - * """Parse the given JSON document. - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __PYX_XCLEAR_MEMVIEW(&__pyx_t_6, 1); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_AddTraceback("csimdjson.Parser.parse", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __PYX_XCLEAR_MEMVIEW(&__pyx_v_data, 1); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":495 - * ) - * - * def load(self, path, bint recursive=False): # <<<<<<<<<<<<<< - * """Load a JSON document from the file system path `path`. - * - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Parser_7load(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Parser_6load, "Parser.load(self, path, bool recursive=False)\nLoad a JSON document from the file system path `path`.\n\n If any :class:`~Object` or :class:`~Array` proxies still pointing to\n a previously-parsed document exist when this method is called, a\n `RuntimeError` may be raised.\n\n :param path: A filesystem path.\n :param recursive: Recursively turn the document into real\n python objects instead of pysimdjson proxies.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Parser_7load = {"load", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_7load, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_6load}; -static PyObject *__pyx_pw_9csimdjson_6Parser_7load(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_path = 0; - int __pyx_v_recursive; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[2] = {0,0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("load (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 495, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_path,&__pyx_n_s_recursive,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_path)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 495, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - CYTHON_FALLTHROUGH; - case 1: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_recursive); - if (value) { values[1] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 495, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "load") < 0)) __PYX_ERR(0, 495, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1); - CYTHON_FALLTHROUGH; - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_path = values[0]; - if (values[1]) { - __pyx_v_recursive = __Pyx_PyObject_IsTrue(values[1]); if (unlikely((__pyx_v_recursive == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 495, __pyx_L3_error) - } else { - __pyx_v_recursive = ((int)0); - } - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("load", 0, 1, 2, __pyx_nargs); __PYX_ERR(0, 495, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Parser.load", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Parser_6load(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self), __pyx_v_path, __pyx_v_recursive); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Parser_6load(struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_path, int __pyx_v_recursive) { - simdjson::dom::element __pyx_v_document; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_t_1; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - int __pyx_t_5; - char const *__pyx_t_6; - simdjson::dom::element __pyx_t_7; - struct __pyx_opt_args_9csimdjson_element_to_primitive __pyx_t_8; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("load", 0); - __Pyx_INCREF(__pyx_v_path); - - /* "csimdjson.pyx":506 - * python objects instead of pysimdjson proxies. - * """ - * if self.c_parser.use_count() > 1: # <<<<<<<<<<<<<< - * raise RuntimeError( - * 'Tried to re-use a parser while simdjson.Object and/or' - */ - __pyx_t_1 = (__pyx_v_self->c_parser.use_count() > 1); - if (unlikely(__pyx_t_1)) { - - /* "csimdjson.pyx":507 - * """ - * if self.c_parser.use_count() > 1: - * raise RuntimeError( # <<<<<<<<<<<<<< - * 'Tried to re-use a parser while simdjson.Object and/or' - * ' simdjson.Array objects still exist referencing the old' - */ - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__14, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 507, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_Raise(__pyx_t_2, 0, 0, 0); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __PYX_ERR(0, 507, __pyx_L1_error) - - /* "csimdjson.pyx":506 - * python objects instead of pysimdjson proxies. - * """ - * if self.c_parser.use_count() > 1: # <<<<<<<<<<<<<< - * raise RuntimeError( - * 'Tried to re-use a parser while simdjson.Object and/or' - */ - } - - /* "csimdjson.pyx":513 - * ) - * - * if isinstance(path, unicode): # <<<<<<<<<<<<<< - * path = (path).encode('utf-8') - * elif isinstance(path, pathlib.Path): - */ - __pyx_t_1 = PyUnicode_Check(__pyx_v_path); - if (__pyx_t_1) { - - /* "csimdjson.pyx":514 - * - * if isinstance(path, unicode): - * path = (path).encode('utf-8') # <<<<<<<<<<<<<< - * elif isinstance(path, pathlib.Path): - * path = str(path).encode('utf-8') - */ - if (unlikely(__pyx_v_path == Py_None)) { - PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "encode"); - __PYX_ERR(0, 514, __pyx_L1_error) - } - __pyx_t_2 = PyUnicode_AsUTF8String(((PyObject*)__pyx_v_path)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 514, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF_SET(__pyx_v_path, __pyx_t_2); - __pyx_t_2 = 0; - - /* "csimdjson.pyx":513 - * ) - * - * if isinstance(path, unicode): # <<<<<<<<<<<<<< - * path = (path).encode('utf-8') - * elif isinstance(path, pathlib.Path): - */ - goto __pyx_L4; - } - - /* "csimdjson.pyx":515 - * if isinstance(path, unicode): - * path = (path).encode('utf-8') - * elif isinstance(path, pathlib.Path): # <<<<<<<<<<<<<< - * path = str(path).encode('utf-8') - * - */ - __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_pathlib); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 515, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_Path); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 515, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_1 = PyObject_IsInstance(__pyx_v_path, __pyx_t_3); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 515, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (__pyx_t_1) { - - /* "csimdjson.pyx":516 - * path = (path).encode('utf-8') - * elif isinstance(path, pathlib.Path): - * path = str(path).encode('utf-8') # <<<<<<<<<<<<<< - * - * cdef simd_element document = dereference(self.c_parser).load(path) - */ - __pyx_t_2 = __Pyx_PyObject_Str(__pyx_v_path); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 516, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_encode); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 516, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = NULL; - __pyx_t_5 = 0; - #if CYTHON_UNPACK_METHODS - if (likely(PyMethod_Check(__pyx_t_4))) { - __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_4); - if (likely(__pyx_t_2)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); - __Pyx_INCREF(__pyx_t_2); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_4, function); - __pyx_t_5 = 1; - } - } - #endif - { - PyObject *__pyx_callargs[2] = {__pyx_t_2, __pyx_kp_u_utf_8}; - __pyx_t_3 = __Pyx_PyObject_FastCall(__pyx_t_4, __pyx_callargs+1-__pyx_t_5, 1+__pyx_t_5); - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 516, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - } - __Pyx_DECREF_SET(__pyx_v_path, __pyx_t_3); - __pyx_t_3 = 0; - - /* "csimdjson.pyx":515 - * if isinstance(path, unicode): - * path = (path).encode('utf-8') - * elif isinstance(path, pathlib.Path): # <<<<<<<<<<<<<< - * path = str(path).encode('utf-8') - * - */ - } - __pyx_L4:; - - /* "csimdjson.pyx":518 - * path = str(path).encode('utf-8') - * - * cdef simd_element document = dereference(self.c_parser).load(path) # <<<<<<<<<<<<<< - * return element_to_primitive(self, document, recursive) - * - */ - __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_v_path); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) __PYX_ERR(0, 518, __pyx_L1_error) - try { - __pyx_t_7 = (*__pyx_v_self->c_parser).load(__pyx_t_6); - } catch(...) { - simdjson_error_handler(); if (!PyErr_Occurred())PyErr_SetString(PyExc_RuntimeError, "Error converting c++ exception."); - __PYX_ERR(0, 518, __pyx_L1_error) - } - __pyx_v_document = __PYX_STD_MOVE_IF_SUPPORTED(__pyx_t_7); - - /* "csimdjson.pyx":519 - * - * cdef simd_element document = dereference(self.c_parser).load(path) - * return element_to_primitive(self, document, recursive) # <<<<<<<<<<<<<< - * - * def get_implementations(self, supported_by_runtime=True): - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_8.__pyx_n = 1; - __pyx_t_8.recursive = __pyx_v_recursive; - __pyx_t_3 = __pyx_f_9csimdjson_element_to_primitive(__pyx_v_self, __pyx_v_document, &__pyx_t_8); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 519, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":495 - * ) - * - * def load(self, path, bint recursive=False): # <<<<<<<<<<<<<< - * """Load a JSON document from the file system path `path`. - * - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("csimdjson.Parser.load", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XDECREF(__pyx_v_path); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static PyObject *__pyx_gb_9csimdjson_6Parser_10generator4(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value); /* proto */ - -/* "csimdjson.pyx":521 - * return element_to_primitive(self, document, recursive) - * - * def get_implementations(self, supported_by_runtime=True): # <<<<<<<<<<<<<< - * """ - * A list of available parser implementations in the form of [(name, - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Parser_9get_implementations(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Parser_8get_implementations, "Parser.get_implementations(self, supported_by_runtime=True)\n\n A list of available parser implementations in the form of [(name,\n description),\342\200\246].\n\n By default, this only returns the implementations that are usable on\n the current runtime. Setting `supported_by_runtime` to False will\n instead return all the implementations _compiled_ into this build of\n simdjson.\n "); -static PyMethodDef __pyx_mdef_9csimdjson_6Parser_9get_implementations = {"get_implementations", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_9get_implementations, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_8get_implementations}; -static PyObject *__pyx_pw_9csimdjson_6Parser_9get_implementations(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - PyObject *__pyx_v_supported_by_runtime = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("get_implementations (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(0, 521, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_supported_by_runtime,0}; - values[0] = __Pyx_Arg_NewRef_FASTCALL(((PyObject *)Py_True)); - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (kw_args > 0) { - PyObject* value = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_supported_by_runtime); - if (value) { values[0] = __Pyx_Arg_NewRef_FASTCALL(value); kw_args--; } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 521, __pyx_L3_error) - } - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "get_implementations") < 0)) __PYX_ERR(0, 521, __pyx_L3_error) - } - } else { - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - } - __pyx_v_supported_by_runtime = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("get_implementations", 0, 0, 1, __pyx_nargs); __PYX_ERR(0, 521, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Parser.get_implementations", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Parser_8get_implementations(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self), __pyx_v_supported_by_runtime); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Parser_8get_implementations(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_supported_by_runtime) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *__pyx_cur_scope; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("get_implementations", 0); - __pyx_cur_scope = (struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *)__pyx_tp_new_9csimdjson___pyx_scope_struct_4_get_implementations(__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations, __pyx_empty_tuple, NULL); - if (unlikely(!__pyx_cur_scope)) { - __pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *)Py_None); - __Pyx_INCREF(Py_None); - __PYX_ERR(0, 521, __pyx_L1_error) - } else { - __Pyx_GOTREF((PyObject *)__pyx_cur_scope); - } - __pyx_cur_scope->__pyx_v_self = __pyx_v_self; - __Pyx_INCREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); - __pyx_cur_scope->__pyx_v_supported_by_runtime = __pyx_v_supported_by_runtime; - __Pyx_INCREF(__pyx_cur_scope->__pyx_v_supported_by_runtime); - __Pyx_GIVEREF(__pyx_cur_scope->__pyx_v_supported_by_runtime); - { - __pyx_CoroutineObject *gen = __Pyx_Generator_New((__pyx_coroutine_body_t) __pyx_gb_9csimdjson_6Parser_10generator4, __pyx_codeobj__16, (PyObject *) __pyx_cur_scope, __pyx_n_s_get_implementations, __pyx_n_s_Parser_get_implementations, __pyx_n_s_csimdjson); if (unlikely(!gen)) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_DECREF(__pyx_cur_scope); - __Pyx_RefNannyFinishContext(); - return (PyObject *) gen; - } - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Parser.get_implementations", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_DECREF((PyObject *)__pyx_cur_scope); - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_gb_9csimdjson_6Parser_10generator4(__pyx_CoroutineObject *__pyx_generator, CYTHON_UNUSED PyThreadState *__pyx_tstate, PyObject *__pyx_sent_value) /* generator body */ -{ - struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *__pyx_cur_scope = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *)__pyx_generator->closure); - PyObject *__pyx_r = NULL; - simdjson::implementation const *const *__pyx_t_1; - simdjson::internal::available_implementation_list const *__pyx_t_2; - simdjson::implementation const *__pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("get_implementations", 0); - switch (__pyx_generator->resume_label) { - case 0: goto __pyx_L3_first_run; - case 1: goto __pyx_L9_resume_from_yield; - default: /* CPython raises the right error here */ - __Pyx_RefNannyFinishContext(); - return NULL; - } - __pyx_L3_first_run:; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 521, __pyx_L1_error) - - /* "csimdjson.pyx":531 - * simdjson. - * """ - * for impl in get_available_implementations(): # <<<<<<<<<<<<<< - * if supported_by_runtime and not impl.supported_by_runtime_system(): - * continue - */ - __pyx_t_2 = &simdjson::get_available_implementations(); - __pyx_t_1 = __pyx_t_2->begin(); - for (;;) { - if (!(__pyx_t_1 != __pyx_t_2->end())) break; - __pyx_t_3 = *__pyx_t_1; - ++__pyx_t_1; - __pyx_cur_scope->__pyx_v_impl = __pyx_t_3; - - /* "csimdjson.pyx":532 - * """ - * for impl in get_available_implementations(): - * if supported_by_runtime and not impl.supported_by_runtime_system(): # <<<<<<<<<<<<<< - * continue - * - */ - __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_cur_scope->__pyx_v_supported_by_runtime); if (unlikely((__pyx_t_5 < 0))) __PYX_ERR(0, 532, __pyx_L1_error) - if (__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L7_bool_binop_done; - } - __pyx_t_5 = (!__pyx_cur_scope->__pyx_v_impl->supported_by_runtime_system()); - __pyx_t_4 = __pyx_t_5; - __pyx_L7_bool_binop_done:; - if (__pyx_t_4) { - - /* "csimdjson.pyx":533 - * for impl in get_available_implementations(): - * if supported_by_runtime and not impl.supported_by_runtime_system(): - * continue # <<<<<<<<<<<<<< - * - * yield impl.name(), impl.description() - */ - goto __pyx_L4_continue; - - /* "csimdjson.pyx":532 - * """ - * for impl in get_available_implementations(): - * if supported_by_runtime and not impl.supported_by_runtime_system(): # <<<<<<<<<<<<<< - * continue - * - */ - } - - /* "csimdjson.pyx":535 - * continue - * - * yield impl.name(), impl.description() # <<<<<<<<<<<<<< - * - * @property - */ - __pyx_t_6 = __pyx_convert_PyUnicode_string_to_py_std__in_string(__pyx_cur_scope->__pyx_v_impl->name()); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 535, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __pyx_convert_PyUnicode_string_to_py_std__in_string(__pyx_cur_scope->__pyx_v_impl->description()); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 535, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = PyTuple_New(2); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 535, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_8); - __Pyx_GIVEREF(__pyx_t_6); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_6)) __PYX_ERR(0, 535, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_7); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_8, 1, __pyx_t_7)) __PYX_ERR(0, 535, __pyx_L1_error); - __pyx_t_6 = 0; - __pyx_t_7 = 0; - __pyx_r = __pyx_t_8; - __pyx_t_8 = 0; - __pyx_cur_scope->__pyx_t_0 = __pyx_t_1; - __pyx_cur_scope->__pyx_t_1 = __pyx_t_2; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - /* return from generator, yielding value */ - __pyx_generator->resume_label = 1; - return __pyx_r; - __pyx_L9_resume_from_yield:; - __pyx_t_1 = __pyx_cur_scope->__pyx_t_0; - __pyx_t_2 = __pyx_cur_scope->__pyx_t_1; - if (unlikely(!__pyx_sent_value)) __PYX_ERR(0, 535, __pyx_L1_error) - - /* "csimdjson.pyx":531 - * simdjson. - * """ - * for impl in get_available_implementations(): # <<<<<<<<<<<<<< - * if supported_by_runtime and not impl.supported_by_runtime_system(): - * continue - */ - __pyx_L4_continue:; - } - CYTHON_MAYBE_UNUSED_VAR(__pyx_cur_scope); - - /* "csimdjson.pyx":521 - * return element_to_primitive(self, document, recursive) - * - * def get_implementations(self, supported_by_runtime=True): # <<<<<<<<<<<<<< - * """ - * A list of available parser implementations in the form of [(name, - */ - - /* function exit code */ - PyErr_SetNone(PyExc_StopIteration); - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_Generator_Replace_StopIteration(0); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_AddTraceback("get_implementations", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_L0:; - __Pyx_XDECREF(__pyx_r); __pyx_r = 0; - #if !CYTHON_USE_EXC_INFO_STACK - __Pyx_Coroutine_ResetAndClearException(__pyx_generator); - #endif - __pyx_generator->resume_label = -1; - __Pyx_Coroutine_clear((PyObject*)__pyx_generator); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":537 - * yield impl.name(), impl.description() - * - * @property # <<<<<<<<<<<<<< - * def implementation(self): - * """ - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Parser_14implementation_1__get__(PyObject *__pyx_v_self); /*proto*/ -static PyObject *__pyx_pw_9csimdjson_6Parser_14implementation_1__get__(PyObject *__pyx_v_self) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__get__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Parser_14implementation___get__(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Parser_14implementation___get__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self) { - simdjson::implementation const *__pyx_v_impl; - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__get__", 0); - - /* "csimdjson.pyx":549 - * """ - * cdef const Implementation * impl = ( - * get_active_implementation() # <<<<<<<<<<<<<< - * ) - * return impl.name(), impl.description() - */ - __pyx_v_impl = ((simdjson::implementation const *)simdjson::get_active_implementation()); - - /* "csimdjson.pyx":551 - * get_active_implementation() - * ) - * return impl.name(), impl.description() # <<<<<<<<<<<<<< - * - * @implementation.setter - */ - __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_convert_PyUnicode_string_to_py_std__in_string(__pyx_v_impl->name()); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 551, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __pyx_convert_PyUnicode_string_to_py_std__in_string(__pyx_v_impl->description()); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 551, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 551, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_3); - __Pyx_GIVEREF(__pyx_t_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1)) __PYX_ERR(0, 551, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_t_2); - if (__Pyx_PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2)) __PYX_ERR(0, 551, __pyx_L1_error); - __pyx_t_1 = 0; - __pyx_t_2 = 0; - __pyx_r = __pyx_t_3; - __pyx_t_3 = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":537 - * yield impl.name(), impl.description() - * - * @property # <<<<<<<<<<<<<< - * def implementation(self): - * """ - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_2); - __Pyx_XDECREF(__pyx_t_3); - __Pyx_AddTraceback("csimdjson.Parser.implementation.__get__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __pyx_L0:; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "csimdjson.pyx":553 - * return impl.name(), impl.description() - * - * @implementation.setter # <<<<<<<<<<<<<< - * def implementation(self, name): - * for impl in get_available_implementations(): - */ - -/* Python wrapper */ -static int __pyx_pw_9csimdjson_6Parser_14implementation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_name); /*proto*/ -static int __pyx_pw_9csimdjson_6Parser_14implementation_3__set__(PyObject *__pyx_v_self, PyObject *__pyx_v_name) { - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_r; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__set__ (wrapper)", 0); - __pyx_kwvalues = __Pyx_KwValues_VARARGS(__pyx_args, __pyx_nargs); - __pyx_r = __pyx_pf_9csimdjson_6Parser_14implementation_2__set__(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self), ((PyObject *)__pyx_v_name)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static int __pyx_pf_9csimdjson_6Parser_14implementation_2__set__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, PyObject *__pyx_v_name) { - simdjson::implementation const *__pyx_v_impl; - int __pyx_r; - __Pyx_RefNannyDeclarations - simdjson::implementation const *const *__pyx_t_1; - simdjson::internal::available_implementation_list const *__pyx_t_2; - simdjson::implementation const *__pyx_t_3; - PyObject *__pyx_t_4 = NULL; - char const *__pyx_t_5; - int __pyx_t_6; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__set__", 0); - - /* "csimdjson.pyx":555 - * @implementation.setter - * def implementation(self, name): - * for impl in get_available_implementations(): # <<<<<<<<<<<<<< - * if impl.name() != str_as_bytes(name): - * continue - */ - __pyx_t_2 = &simdjson::get_available_implementations(); - __pyx_t_1 = __pyx_t_2->begin(); - for (;;) { - if (!(__pyx_t_1 != __pyx_t_2->end())) break; - __pyx_t_3 = *__pyx_t_1; - ++__pyx_t_1; - __pyx_v_impl = __pyx_t_3; - - /* "csimdjson.pyx":556 - * def implementation(self, name): - * for impl in get_available_implementations(): - * if impl.name() != str_as_bytes(name): # <<<<<<<<<<<<<< - * continue - * - */ - __pyx_t_4 = __pyx_f_9csimdjson_str_as_bytes(__pyx_v_name); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 556, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (unlikely(__pyx_t_4 == Py_None)) { - PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(0, 556, __pyx_L1_error) - } - __pyx_t_5 = __Pyx_PyBytes_AsString(__pyx_t_4); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 556, __pyx_L1_error) - __pyx_t_6 = (__pyx_v_impl->name() != __pyx_t_5); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_6) { - - /* "csimdjson.pyx":557 - * for impl in get_available_implementations(): - * if impl.name() != str_as_bytes(name): - * continue # <<<<<<<<<<<<<< - * - * if not impl.supported_by_runtime_system(): - */ - goto __pyx_L3_continue; - - /* "csimdjson.pyx":556 - * def implementation(self, name): - * for impl in get_available_implementations(): - * if impl.name() != str_as_bytes(name): # <<<<<<<<<<<<<< - * continue - * - */ - } - - /* "csimdjson.pyx":559 - * continue - * - * if not impl.supported_by_runtime_system(): # <<<<<<<<<<<<<< - * raise RuntimeError( - * 'Attempted to set a runtime Implementation that is not' - */ - __pyx_t_6 = (!__pyx_v_impl->supported_by_runtime_system()); - if (unlikely(__pyx_t_6)) { - - /* "csimdjson.pyx":560 - * - * if not impl.supported_by_runtime_system(): - * raise RuntimeError( # <<<<<<<<<<<<<< - * 'Attempted to set a runtime Implementation that is not' - * 'supported on the current host.' - */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__17, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 560, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_Raise(__pyx_t_4, 0, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __PYX_ERR(0, 560, __pyx_L1_error) - - /* "csimdjson.pyx":559 - * continue - * - * if not impl.supported_by_runtime_system(): # <<<<<<<<<<<<<< - * raise RuntimeError( - * 'Attempted to set a runtime Implementation that is not' - */ - } - - /* "csimdjson.pyx":565 - * ) - * - * set_active_implementation(impl) # <<<<<<<<<<<<<< - * return - * - */ - set_active_implementation(__pyx_v_impl); - - /* "csimdjson.pyx":566 - * - * set_active_implementation(impl) - * return # <<<<<<<<<<<<<< - * - * raise ValueError('Unknown Implementation') - */ - __pyx_r = 0; - goto __pyx_L0; - - /* "csimdjson.pyx":555 - * @implementation.setter - * def implementation(self, name): - * for impl in get_available_implementations(): # <<<<<<<<<<<<<< - * if impl.name() != str_as_bytes(name): - * continue - */ - __pyx_L3_continue:; - } - - /* "csimdjson.pyx":568 - * return - * - * raise ValueError('Unknown Implementation') # <<<<<<<<<<<<<< - */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__18, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 568, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_Raise(__pyx_t_4, 0, 0, 0); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __PYX_ERR(0, 568, __pyx_L1_error) - - /* "csimdjson.pyx":553 - * return impl.name(), impl.description() - * - * @implementation.setter # <<<<<<<<<<<<<< - * def implementation(self, name): - * for impl in get_available_implementations(): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_4); - __Pyx_AddTraceback("csimdjson.Parser.implementation.__set__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = -1; - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Parser_12__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Parser_11__reduce_cython__, "Parser.__reduce_cython__(self)"); -static PyMethodDef __pyx_mdef_9csimdjson_6Parser_12__reduce_cython__ = {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_12__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_11__reduce_cython__}; -static PyObject *__pyx_pw_9csimdjson_6Parser_12__reduce_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__reduce_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 1, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - if (unlikely(__pyx_nargs > 0)) { - __Pyx_RaiseArgtupleInvalid("__reduce_cython__", 1, 0, 0, __pyx_nargs); return NULL;} - if (unlikely(__pyx_kwds) && __Pyx_NumKwargs_FASTCALL(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "__reduce_cython__", 0))) return NULL; - goto __pyx_L4_argument_unpacking_done; - goto __pyx_L3_error; - __pyx_L3_error:; - __Pyx_AddTraceback("csimdjson.Parser.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Parser_11__reduce_cython__(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self)); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Parser_11__reduce_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__reduce_cython__", 0); - - /* "(tree fragment)":2 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 2, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Parser.__reduce_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -/* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - -/* Python wrapper */ -static PyObject *__pyx_pw_9csimdjson_6Parser_14__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -); /*proto*/ -PyDoc_STRVAR(__pyx_doc_9csimdjson_6Parser_13__setstate_cython__, "Parser.__setstate_cython__(self, __pyx_state)"); -static PyMethodDef __pyx_mdef_9csimdjson_6Parser_14__setstate_cython__ = {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_14__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_13__setstate_cython__}; -static PyObject *__pyx_pw_9csimdjson_6Parser_14__setstate_cython__(PyObject *__pyx_v_self, -#if CYTHON_METH_FASTCALL -PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds -#else -PyObject *__pyx_args, PyObject *__pyx_kwds -#endif -) { - CYTHON_UNUSED PyObject *__pyx_v___pyx_state = 0; - #if !CYTHON_METH_FASTCALL - CYTHON_UNUSED Py_ssize_t __pyx_nargs; - #endif - CYTHON_UNUSED PyObject *const *__pyx_kwvalues; - PyObject* values[1] = {0}; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__setstate_cython__ (wrapper)", 0); - #if !CYTHON_METH_FASTCALL - #if CYTHON_ASSUME_SAFE_MACROS - __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); - #else - __pyx_nargs = PyTuple_Size(__pyx_args); - if (unlikely((__pyx_nargs < 0))) __PYX_ERR(1, 3, __pyx_L3_error) - #endif - #endif - __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); - { - PyObject **__pyx_pyargnames[] = {&__pyx_n_s_pyx_state,0}; - if (__pyx_kwds) { - Py_ssize_t kw_args; - switch (__pyx_nargs) { - case 1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - CYTHON_FALLTHROUGH; - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds); - switch (__pyx_nargs) { - case 0: - if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_pyx_state)) != 0)) { - (void)__Pyx_Arg_NewRef_FASTCALL(values[0]); - kw_args--; - } - else if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 3, __pyx_L3_error) - else goto __pyx_L5_argtuple_error; - } - if (unlikely(kw_args > 0)) { - const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "__setstate_cython__") < 0)) __PYX_ERR(1, 3, __pyx_L3_error) - } - } else if (unlikely(__pyx_nargs != 1)) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0); - } - __pyx_v___pyx_state = values[0]; - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, __pyx_nargs); __PYX_ERR(1, 3, __pyx_L3_error) - goto __pyx_L3_error; - __pyx_L3_error:; - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_AddTraceback("csimdjson.Parser.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_9csimdjson_6Parser_13__setstate_cython__(((struct __pyx_obj_9csimdjson_Parser *)__pyx_v_self), __pyx_v___pyx_state); - - /* function exit code */ - { - Py_ssize_t __pyx_temp; - for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { - __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]); - } - } - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_9csimdjson_6Parser_13__setstate_cython__(CYTHON_UNUSED struct __pyx_obj_9csimdjson_Parser *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state) { - PyObject *__pyx_r = NULL; - __Pyx_RefNannyDeclarations - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__setstate_cython__", 0); - - /* "(tree fragment)":4 - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" # <<<<<<<<<<<<<< - */ - __Pyx_Raise(__pyx_builtin_TypeError, __pyx_kp_s_no_default___reduce___due_to_non, 0, 0); - __PYX_ERR(1, 4, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - - /* function exit code */ - __pyx_L1_error:; - __Pyx_AddTraceback("csimdjson.Parser.__setstate_cython__", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; - __Pyx_XGIVEREF(__pyx_r); - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} -static struct __pyx_vtabstruct_9csimdjson_ArrayBuffer __pyx_vtable_9csimdjson_ArrayBuffer; - -static PyObject *__pyx_tp_new_9csimdjson_ArrayBuffer(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson_ArrayBuffer *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_9csimdjson_ArrayBuffer *)o); - p->__pyx_vtab = __pyx_vtabptr_9csimdjson_ArrayBuffer; - if (unlikely(__pyx_pw_9csimdjson_11ArrayBuffer_1__cinit__(o, __pyx_empty_tuple, NULL) < 0)) goto bad; - return o; - bad: - Py_DECREF(o); o = 0; - return NULL; -} - -static void __pyx_tp_dealloc_9csimdjson_ArrayBuffer(PyObject *o) { - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson_ArrayBuffer) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_pw_9csimdjson_11ArrayBuffer_3__dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static PyObject *__pyx_getprop_9csimdjson_11ArrayBuffer_size(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_9csimdjson_11ArrayBuffer_4size_1__get__(o); -} - -static PyMethodDef __pyx_methods_9csimdjson_ArrayBuffer[] = { - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_11ArrayBuffer_9__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_11ArrayBuffer_8__reduce_cython__}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_11ArrayBuffer_11__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_11ArrayBuffer_10__setstate_cython__}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_9csimdjson_ArrayBuffer[] = { - {(char *)"size", __pyx_getprop_9csimdjson_11ArrayBuffer_size, 0, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -#if !CYTHON_COMPILING_IN_LIMITED_API - -static PyBufferProcs __pyx_tp_as_buffer_ArrayBuffer = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - __pyx_pw_9csimdjson_11ArrayBuffer_5__getbuffer__, /*bf_getbuffer*/ - __pyx_pw_9csimdjson_11ArrayBuffer_7__releasebuffer__, /*bf_releasebuffer*/ -}; -#endif -static PyType_Slot __pyx_type_9csimdjson_ArrayBuffer_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson_ArrayBuffer}, - #if defined(Py_bf_getbuffer) - {Py_bf_getbuffer, (void *)__pyx_pw_9csimdjson_11ArrayBuffer_5__getbuffer__}, - #endif - #if defined(Py_bf_releasebuffer) - {Py_bf_releasebuffer, (void *)__pyx_pw_9csimdjson_11ArrayBuffer_7__releasebuffer__}, - #endif - {Py_tp_doc, (void *)PyDoc_STR("\n A container for the flattened data of a homogeneous :class:`Array`.\n\n .. admonition::\n :class: note\n\n This object is responsible for keeping the contents of an Array alive\n even after the simdjson Parser has been reused or destroyed.\n\n .. admonition::\n :class: warning\n\n You should never create this class on your own. It is created and\n returned for you by :func:`Array.as_buffer`.\n ")}, - {Py_tp_methods, (void *)__pyx_methods_9csimdjson_ArrayBuffer}, - {Py_tp_getset, (void *)__pyx_getsets_9csimdjson_ArrayBuffer}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson_ArrayBuffer}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson_ArrayBuffer_spec = { - "csimdjson.ArrayBuffer", - sizeof(struct __pyx_obj_9csimdjson_ArrayBuffer), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, - __pyx_type_9csimdjson_ArrayBuffer_slots, -}; -#else - -static PyBufferProcs __pyx_tp_as_buffer_ArrayBuffer = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - __pyx_pw_9csimdjson_11ArrayBuffer_5__getbuffer__, /*bf_getbuffer*/ - __pyx_pw_9csimdjson_11ArrayBuffer_7__releasebuffer__, /*bf_releasebuffer*/ -}; - -static PyTypeObject __pyx_type_9csimdjson_ArrayBuffer = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""ArrayBuffer", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson_ArrayBuffer), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson_ArrayBuffer, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_ArrayBuffer, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/ - PyDoc_STR("\n A container for the flattened data of a homogeneous :class:`Array`.\n\n .. admonition::\n :class: note\n\n This object is responsible for keeping the contents of an Array alive\n even after the simdjson Parser has been reused or destroyed.\n\n .. admonition::\n :class: warning\n\n You should never create this class on your own. It is created and\n returned for you by :func:`Array.as_buffer`.\n "), /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_9csimdjson_ArrayBuffer, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_9csimdjson_ArrayBuffer, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson_ArrayBuffer, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif -static struct __pyx_vtabstruct_9csimdjson_Array __pyx_vtable_9csimdjson_Array; - -static PyObject *__pyx_tp_new_9csimdjson_Array(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson_Array *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_9csimdjson_Array *)o); - p->__pyx_vtab = __pyx_vtabptr_9csimdjson_Array; - new((void*)&(p->c_element)) simdjson::dom::array(); - new((void*)&(p->c_parser)) std::shared_ptr (); - p->parser = ((struct __pyx_obj_9csimdjson_Parser *)Py_None); Py_INCREF(Py_None); - return o; -} - -static void __pyx_tp_dealloc_9csimdjson_Array(PyObject *o) { - struct __pyx_obj_9csimdjson_Array *p = (struct __pyx_obj_9csimdjson_Array *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson_Array) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - __Pyx_call_destructor(p->c_element); - __Pyx_call_destructor(p->c_parser); - Py_CLEAR(p->parser); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_9csimdjson_Array(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson_Array *p = (struct __pyx_obj_9csimdjson_Array *)o; - if (p->parser) { - e = (*v)(((PyObject *)p->parser), a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_9csimdjson_Array(PyObject *o) { - PyObject* tmp; - struct __pyx_obj_9csimdjson_Array *p = (struct __pyx_obj_9csimdjson_Array *)o; - tmp = ((PyObject*)p->parser); - p->parser = ((struct __pyx_obj_9csimdjson_Parser *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} -static PyObject *__pyx_sq_item_9csimdjson_Array(PyObject *o, Py_ssize_t i) { - PyObject *r; - PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0; - r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x); - Py_DECREF(x); - return r; -} - -static PyObject *__pyx_getprop_9csimdjson_5Array_mini(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_9csimdjson_5Array_4mini_1__get__(o); -} - -static PyObject *__pyx_getprop_9csimdjson_5Array_parser(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_9csimdjson_5Array_6parser_1__get__(o); -} - -static PyMethodDef __pyx_methods_9csimdjson_Array[] = { - {"at_pointer", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_8at_pointer, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_7at_pointer}, - {"as_list", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_10as_list, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_9as_list}, - {"as_buffer", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_12as_buffer, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_11as_buffer}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_14__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_13__reduce_cython__}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_5Array_16__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_5Array_15__setstate_cython__}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_9csimdjson_Array[] = { - {(char *)"mini", __pyx_getprop_9csimdjson_5Array_mini, 0, (char *)PyDoc_STR("\n Returns the minified JSON representation of this Array.\n\n :rtype: bytes\n "), 0}, - {(char *)"parser", __pyx_getprop_9csimdjson_5Array_parser, 0, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson_Array_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson_Array}, - {Py_sq_length, (void *)__pyx_pw_9csimdjson_5Array_3__len__}, - {Py_sq_item, (void *)__pyx_sq_item_9csimdjson_Array}, - {Py_mp_length, (void *)__pyx_pw_9csimdjson_5Array_3__len__}, - {Py_mp_subscript, (void *)__pyx_pw_9csimdjson_5Array_1__getitem__}, - {Py_tp_doc, (void *)PyDoc_STR("A proxy object that behaves much like a real `list()`.\n\n Python objects are not created until an element in the list is accessed.\n When you only need a subset of an Array, this can be much faster than\n converting an entire array (and all of its children) into real Python\n objects.\n ")}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson_Array}, - {Py_tp_clear, (void *)__pyx_tp_clear_9csimdjson_Array}, - {Py_tp_iter, (void *)__pyx_pw_9csimdjson_5Array_5__iter__}, - {Py_tp_methods, (void *)__pyx_methods_9csimdjson_Array}, - {Py_tp_getset, (void *)__pyx_getsets_9csimdjson_Array}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson_Array}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson_Array_spec = { - "csimdjson.Array", - sizeof(struct __pyx_obj_9csimdjson_Array), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type_9csimdjson_Array_slots, -}; -#else - -static PySequenceMethods __pyx_tp_as_sequence_Array = { - __pyx_pw_9csimdjson_5Array_3__len__, /*sq_length*/ - 0, /*sq_concat*/ - 0, /*sq_repeat*/ - __pyx_sq_item_9csimdjson_Array, /*sq_item*/ - 0, /*sq_slice*/ - 0, /*sq_ass_item*/ - 0, /*sq_ass_slice*/ - 0, /*sq_contains*/ - 0, /*sq_inplace_concat*/ - 0, /*sq_inplace_repeat*/ -}; - -static PyMappingMethods __pyx_tp_as_mapping_Array = { - __pyx_pw_9csimdjson_5Array_3__len__, /*mp_length*/ - __pyx_pw_9csimdjson_5Array_1__getitem__, /*mp_subscript*/ - 0, /*mp_ass_subscript*/ -}; - -static PyTypeObject __pyx_type_9csimdjson_Array = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""Array", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson_Array), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson_Array, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - &__pyx_tp_as_sequence_Array, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_Array, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - PyDoc_STR("A proxy object that behaves much like a real `list()`.\n\n Python objects are not created until an element in the list is accessed.\n When you only need a subset of an Array, this can be much faster than\n converting an entire array (and all of its children) into real Python\n objects.\n "), /*tp_doc*/ - __pyx_tp_traverse_9csimdjson_Array, /*tp_traverse*/ - __pyx_tp_clear_9csimdjson_Array, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - __pyx_pw_9csimdjson_5Array_5__iter__, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_9csimdjson_Array, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_9csimdjson_Array, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson_Array, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif -static struct __pyx_vtabstruct_9csimdjson_Object __pyx_vtable_9csimdjson_Object; - -static PyObject *__pyx_tp_new_9csimdjson_Object(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson_Object *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_9csimdjson_Object *)o); - p->__pyx_vtab = __pyx_vtabptr_9csimdjson_Object; - new((void*)&(p->c_element)) simdjson::dom::object(); - new((void*)&(p->c_parser)) std::shared_ptr (); - p->parser = ((struct __pyx_obj_9csimdjson_Parser *)Py_None); Py_INCREF(Py_None); - return o; -} - -static void __pyx_tp_dealloc_9csimdjson_Object(PyObject *o) { - struct __pyx_obj_9csimdjson_Object *p = (struct __pyx_obj_9csimdjson_Object *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson_Object) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - __Pyx_call_destructor(p->c_element); - __Pyx_call_destructor(p->c_parser); - Py_CLEAR(p->parser); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_9csimdjson_Object(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson_Object *p = (struct __pyx_obj_9csimdjson_Object *)o; - if (p->parser) { - e = (*v)(((PyObject *)p->parser), a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_9csimdjson_Object(PyObject *o) { - PyObject* tmp; - struct __pyx_obj_9csimdjson_Object *p = (struct __pyx_obj_9csimdjson_Object *)o; - tmp = ((PyObject*)p->parser); - p->parser = ((struct __pyx_obj_9csimdjson_Parser *)Py_None); Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} -static PyObject *__pyx_sq_item_9csimdjson_Object(PyObject *o, Py_ssize_t i) { - PyObject *r; - PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0; - r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x); - Py_DECREF(x); - return r; -} - -static PyObject *__pyx_getprop_9csimdjson_6Object_mini(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_9csimdjson_6Object_4mini_1__get__(o); -} - -static PyObject *__pyx_getprop_9csimdjson_6Object_parser(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_9csimdjson_6Object_6parser_1__get__(o); -} - -static PyMethodDef __pyx_methods_9csimdjson_Object[] = { - {"get", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_3get, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_2get}, - {"values", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_12values, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_11values}, - {"items", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_15items, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_14items}, - {"at_pointer", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_18at_pointer, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_17at_pointer}, - {"as_dict", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_20as_dict, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_19as_dict}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_22__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_21__reduce_cython__}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Object_24__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Object_23__setstate_cython__}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_9csimdjson_Object[] = { - {(char *)"mini", __pyx_getprop_9csimdjson_6Object_mini, 0, (char *)PyDoc_STR("\n Returns the minified JSON representation of this Object.\n\n :rtype: bytes\n "), 0}, - {(char *)"parser", __pyx_getprop_9csimdjson_6Object_parser, 0, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson_Object_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson_Object}, - {Py_sq_length, (void *)__pyx_pw_9csimdjson_6Object_5__len__}, - {Py_sq_item, (void *)__pyx_sq_item_9csimdjson_Object}, - {Py_sq_contains, (void *)__pyx_pw_9csimdjson_6Object_7__contains__}, - {Py_mp_length, (void *)__pyx_pw_9csimdjson_6Object_5__len__}, - {Py_mp_subscript, (void *)__pyx_pw_9csimdjson_6Object_1__getitem__}, - {Py_tp_doc, (void *)PyDoc_STR("A proxy object that behaves much like a real `dict()`.\n\n Python objects are not created until an element in the Object\n is accessed. When you only need a subset of an Object, this can be much\n faster than converting an entire Object (and all of its children) into real\n Python objects.\n ")}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson_Object}, - {Py_tp_clear, (void *)__pyx_tp_clear_9csimdjson_Object}, - {Py_tp_iter, (void *)__pyx_pw_9csimdjson_6Object_9__iter__}, - {Py_tp_methods, (void *)__pyx_methods_9csimdjson_Object}, - {Py_tp_getset, (void *)__pyx_getsets_9csimdjson_Object}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson_Object}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson_Object_spec = { - "csimdjson.Object", - sizeof(struct __pyx_obj_9csimdjson_Object), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type_9csimdjson_Object_slots, -}; -#else - -static PySequenceMethods __pyx_tp_as_sequence_Object = { - __pyx_pw_9csimdjson_6Object_5__len__, /*sq_length*/ - 0, /*sq_concat*/ - 0, /*sq_repeat*/ - __pyx_sq_item_9csimdjson_Object, /*sq_item*/ - 0, /*sq_slice*/ - 0, /*sq_ass_item*/ - 0, /*sq_ass_slice*/ - __pyx_pw_9csimdjson_6Object_7__contains__, /*sq_contains*/ - 0, /*sq_inplace_concat*/ - 0, /*sq_inplace_repeat*/ -}; - -static PyMappingMethods __pyx_tp_as_mapping_Object = { - __pyx_pw_9csimdjson_6Object_5__len__, /*mp_length*/ - __pyx_pw_9csimdjson_6Object_1__getitem__, /*mp_subscript*/ - 0, /*mp_ass_subscript*/ -}; - -static PyTypeObject __pyx_type_9csimdjson_Object = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""Object", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson_Object), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson_Object, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - &__pyx_tp_as_sequence_Object, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_Object, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - PyDoc_STR("A proxy object that behaves much like a real `dict()`.\n\n Python objects are not created until an element in the Object\n is accessed. When you only need a subset of an Object, this can be much\n faster than converting an entire Object (and all of its children) into real\n Python objects.\n "), /*tp_doc*/ - __pyx_tp_traverse_9csimdjson_Object, /*tp_traverse*/ - __pyx_tp_clear_9csimdjson_Object, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - __pyx_pw_9csimdjson_6Object_9__iter__, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_9csimdjson_Object, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_9csimdjson_Object, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson_Object, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyObject *__pyx_tp_new_9csimdjson_Parser(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_obj_9csimdjson_Parser *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_obj_9csimdjson_Parser *)o); - new((void*)&(p->c_parser)) std::shared_ptr (); - if (unlikely(__pyx_pw_9csimdjson_6Parser_1__cinit__(o, a, k) < 0)) goto bad; - return o; - bad: - Py_DECREF(o); o = 0; - return NULL; -} - -static void __pyx_tp_dealloc_9csimdjson_Parser(PyObject *o) { - struct __pyx_obj_9csimdjson_Parser *p = (struct __pyx_obj_9csimdjson_Parser *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson_Parser) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_pw_9csimdjson_6Parser_3__dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - __Pyx_call_destructor(p->c_parser); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static PyObject *__pyx_getprop_9csimdjson_6Parser_implementation(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_9csimdjson_6Parser_14implementation_1__get__(o); -} - -static int __pyx_setprop_9csimdjson_6Parser_implementation(PyObject *o, PyObject *v, CYTHON_UNUSED void *x) { - if (v) { - return __pyx_pw_9csimdjson_6Parser_14implementation_3__set__(o, v); - } - else { - PyErr_SetString(PyExc_NotImplementedError, "__del__"); - return -1; - } -} - -static PyMethodDef __pyx_methods_9csimdjson_Parser[] = { - {"parse", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_5parse, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_4parse}, - {"load", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_7load, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_6load}, - {"get_implementations", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_9get_implementations, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_8get_implementations}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_12__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_11__reduce_cython__}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_9csimdjson_6Parser_14__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_9csimdjson_6Parser_13__setstate_cython__}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_9csimdjson_Parser[] = { - {(char *)"implementation", __pyx_getprop_9csimdjson_6Parser_implementation, __pyx_setprop_9csimdjson_6Parser_implementation, (char *)PyDoc_STR("\n The active parser Implementation as (name, description). Can be\n any value from :py:attr:`implementations`. The best Implementation\n for your current platform will be picked by default.\n\n Can be set to the name of any valid Implementation to globally\n change underlying Parser Implementation, such as to disable AVX-512\n if it is causing down-clocking.\n "), 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson_Parser_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson_Parser}, - {Py_tp_doc, (void *)PyDoc_STR("\n A `Parser` instance is used to load and/or parse a JSON document.\n\n A Parser can be reused to parse multiple documents, in which case it wil\n reuse its internal buffer, only increasing it if needed.\n\n :param max_capacity: The maximum size the internal buffer can\n grow to. [default: SIMDJSON_MAXSIZE_BYTES]\n ")}, - {Py_tp_methods, (void *)__pyx_methods_9csimdjson_Parser}, - {Py_tp_getset, (void *)__pyx_getsets_9csimdjson_Parser}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson_Parser}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson_Parser_spec = { - "csimdjson.Parser", - sizeof(struct __pyx_obj_9csimdjson_Parser), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, - __pyx_type_9csimdjson_Parser_slots, -}; -#else - -static PyTypeObject __pyx_type_9csimdjson_Parser = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""Parser", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson_Parser), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson_Parser, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE, /*tp_flags*/ - PyDoc_STR("\n A `Parser` instance is used to load and/or parse a JSON document.\n\n A Parser can be reused to parse multiple documents, in which case it wil\n reuse its internal buffer, only increasing it if needed.\n\n :param max_capacity: The maximum size the internal buffer can\n grow to. [default: SIMDJSON_MAXSIZE_BYTES]\n "), /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_9csimdjson_Parser, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_9csimdjson_Parser, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson_Parser, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *__pyx_freelist_9csimdjson___pyx_scope_struct____iter__[8]; -static int __pyx_freecount_9csimdjson___pyx_scope_struct____iter__ = 0; - -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct____iter__(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_COMPILING_IN_CPYTHON - if (likely((int)(__pyx_freecount_9csimdjson___pyx_scope_struct____iter__ > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__)))) { - o = (PyObject*)__pyx_freelist_9csimdjson___pyx_scope_struct____iter__[--__pyx_freecount_9csimdjson___pyx_scope_struct____iter__]; - memset(o, 0, sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - p = ((struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)o); - new((void*)&(p->__pyx_v_it)) simdjson::dom::array::iterator(); - return o; -} - -static void __pyx_tp_dealloc_9csimdjson___pyx_scope_struct____iter__(PyObject *o) { - struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson___pyx_scope_struct____iter__) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - __Pyx_call_destructor(p->__pyx_v_it); - Py_CLEAR(p->__pyx_v_self); - #if CYTHON_COMPILING_IN_CPYTHON - if (((int)(__pyx_freecount_9csimdjson___pyx_scope_struct____iter__ < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__)))) { - __pyx_freelist_9csimdjson___pyx_scope_struct____iter__[__pyx_freecount_9csimdjson___pyx_scope_struct____iter__++] = ((struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_9csimdjson___pyx_scope_struct____iter__(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__ *)o; - if (p->__pyx_v_self) { - e = (*v)(((PyObject *)p->__pyx_v_self), a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson___pyx_scope_struct____iter___slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson___pyx_scope_struct____iter__}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson___pyx_scope_struct____iter__}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson___pyx_scope_struct____iter__}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson___pyx_scope_struct____iter___spec = { - "csimdjson.__pyx_scope_struct____iter__", - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_9csimdjson___pyx_scope_struct____iter___slots, -}; -#else - -static PyTypeObject __pyx_type_9csimdjson___pyx_scope_struct____iter__ = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""__pyx_scope_struct____iter__", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct____iter__), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson___pyx_scope_struct____iter__, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_9csimdjson___pyx_scope_struct____iter__, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson___pyx_scope_struct____iter__, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *__pyx_freelist_9csimdjson___pyx_scope_struct_1___iter__[8]; -static int __pyx_freecount_9csimdjson___pyx_scope_struct_1___iter__ = 0; - -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_1___iter__(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_COMPILING_IN_CPYTHON - if (likely((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_1___iter__ > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__)))) { - o = (PyObject*)__pyx_freelist_9csimdjson___pyx_scope_struct_1___iter__[--__pyx_freecount_9csimdjson___pyx_scope_struct_1___iter__]; - memset(o, 0, sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - p = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)o); - new((void*)&(p->__pyx_v_it)) simdjson::dom::object::iterator(); - return o; -} - -static void __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_1___iter__(PyObject *o) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_1___iter__) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - __Pyx_call_destructor(p->__pyx_v_it); - Py_CLEAR(p->__pyx_v_self); - #if CYTHON_COMPILING_IN_CPYTHON - if (((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_1___iter__ < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__)))) { - __pyx_freelist_9csimdjson___pyx_scope_struct_1___iter__[__pyx_freecount_9csimdjson___pyx_scope_struct_1___iter__++] = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_9csimdjson___pyx_scope_struct_1___iter__(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__ *)o; - if (p->__pyx_v_self) { - e = (*v)(((PyObject *)p->__pyx_v_self), a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson___pyx_scope_struct_1___iter___slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson___pyx_scope_struct_1___iter__}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson___pyx_scope_struct_1___iter__}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson___pyx_scope_struct_1___iter__}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson___pyx_scope_struct_1___iter___spec = { - "csimdjson.__pyx_scope_struct_1___iter__", - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_9csimdjson___pyx_scope_struct_1___iter___slots, -}; -#else - -static PyTypeObject __pyx_type_9csimdjson___pyx_scope_struct_1___iter__ = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""__pyx_scope_struct_1___iter__", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_1___iter__), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_1___iter__, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_9csimdjson___pyx_scope_struct_1___iter__, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson___pyx_scope_struct_1___iter__, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *__pyx_freelist_9csimdjson___pyx_scope_struct_2_values[8]; -static int __pyx_freecount_9csimdjson___pyx_scope_struct_2_values = 0; - -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_2_values(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_COMPILING_IN_CPYTHON - if (likely((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_2_values > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values)))) { - o = (PyObject*)__pyx_freelist_9csimdjson___pyx_scope_struct_2_values[--__pyx_freecount_9csimdjson___pyx_scope_struct_2_values]; - memset(o, 0, sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - p = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)o); - new((void*)&(p->__pyx_v_it)) simdjson::dom::object::iterator(); - return o; -} - -static void __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_2_values(PyObject *o) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_2_values) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - __Pyx_call_destructor(p->__pyx_v_it); - Py_CLEAR(p->__pyx_v_self); - #if CYTHON_COMPILING_IN_CPYTHON - if (((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_2_values < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values)))) { - __pyx_freelist_9csimdjson___pyx_scope_struct_2_values[__pyx_freecount_9csimdjson___pyx_scope_struct_2_values++] = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_9csimdjson___pyx_scope_struct_2_values(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values *)o; - if (p->__pyx_v_self) { - e = (*v)(((PyObject *)p->__pyx_v_self), a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson___pyx_scope_struct_2_values_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson___pyx_scope_struct_2_values}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson___pyx_scope_struct_2_values}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson___pyx_scope_struct_2_values}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson___pyx_scope_struct_2_values_spec = { - "csimdjson.__pyx_scope_struct_2_values", - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_9csimdjson___pyx_scope_struct_2_values_slots, -}; -#else - -static PyTypeObject __pyx_type_9csimdjson___pyx_scope_struct_2_values = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""__pyx_scope_struct_2_values", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_2_values), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_2_values, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_9csimdjson___pyx_scope_struct_2_values, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson___pyx_scope_struct_2_values, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *__pyx_freelist_9csimdjson___pyx_scope_struct_3_items[8]; -static int __pyx_freecount_9csimdjson___pyx_scope_struct_3_items = 0; - -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_3_items(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_COMPILING_IN_CPYTHON - if (likely((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_3_items > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items)))) { - o = (PyObject*)__pyx_freelist_9csimdjson___pyx_scope_struct_3_items[--__pyx_freecount_9csimdjson___pyx_scope_struct_3_items]; - memset(o, 0, sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - p = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)o); - new((void*)&(p->__pyx_v_it)) simdjson::dom::object::iterator(); - return o; -} - -static void __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_3_items(PyObject *o) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_3_items) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - __Pyx_call_destructor(p->__pyx_v_it); - Py_CLEAR(p->__pyx_v_self); - #if CYTHON_COMPILING_IN_CPYTHON - if (((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_3_items < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items)))) { - __pyx_freelist_9csimdjson___pyx_scope_struct_3_items[__pyx_freecount_9csimdjson___pyx_scope_struct_3_items++] = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_9csimdjson___pyx_scope_struct_3_items(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items *)o; - if (p->__pyx_v_self) { - e = (*v)(((PyObject *)p->__pyx_v_self), a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson___pyx_scope_struct_3_items_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson___pyx_scope_struct_3_items}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson___pyx_scope_struct_3_items}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson___pyx_scope_struct_3_items}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson___pyx_scope_struct_3_items_spec = { - "csimdjson.__pyx_scope_struct_3_items", - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_9csimdjson___pyx_scope_struct_3_items_slots, -}; -#else - -static PyTypeObject __pyx_type_9csimdjson___pyx_scope_struct_3_items = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""__pyx_scope_struct_3_items", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_3_items), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_3_items, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_9csimdjson___pyx_scope_struct_3_items, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson___pyx_scope_struct_3_items, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *__pyx_freelist_9csimdjson___pyx_scope_struct_4_get_implementations[8]; -static int __pyx_freecount_9csimdjson___pyx_scope_struct_4_get_implementations = 0; - -static PyObject *__pyx_tp_new_9csimdjson___pyx_scope_struct_4_get_implementations(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - #if CYTHON_COMPILING_IN_CPYTHON - if (likely((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_4_get_implementations > 0) & (int)(t->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations)))) { - o = (PyObject*)__pyx_freelist_9csimdjson___pyx_scope_struct_4_get_implementations[--__pyx_freecount_9csimdjson___pyx_scope_struct_4_get_implementations]; - memset(o, 0, sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations)); - (void) PyObject_INIT(o, t); - PyObject_GC_Track(o); - } else - #endif - { - o = (*t->tp_alloc)(t, 0); - if (unlikely(!o)) return 0; - } - #endif - return o; -} - -static void __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_4_get_implementations(PyObject *o) { - struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_4_get_implementations) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - Py_CLEAR(p->__pyx_v_self); - Py_CLEAR(p->__pyx_v_supported_by_runtime); - #if CYTHON_COMPILING_IN_CPYTHON - if (((int)(__pyx_freecount_9csimdjson___pyx_scope_struct_4_get_implementations < 8) & (int)(Py_TYPE(o)->tp_basicsize == sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations)))) { - __pyx_freelist_9csimdjson___pyx_scope_struct_4_get_implementations[__pyx_freecount_9csimdjson___pyx_scope_struct_4_get_implementations++] = ((struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *)o); - } else - #endif - { - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif - } -} - -static int __pyx_tp_traverse_9csimdjson___pyx_scope_struct_4_get_implementations(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *p = (struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations *)o; - if (p->__pyx_v_self) { - e = (*v)(((PyObject *)p->__pyx_v_self), a); if (e) return e; - } - if (p->__pyx_v_supported_by_runtime) { - e = (*v)(p->__pyx_v_supported_by_runtime, a); if (e) return e; - } - return 0; -} -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_9csimdjson___pyx_scope_struct_4_get_implementations}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_9csimdjson___pyx_scope_struct_4_get_implementations}, - {Py_tp_new, (void *)__pyx_tp_new_9csimdjson___pyx_scope_struct_4_get_implementations}, - {0, 0}, -}; -static PyType_Spec __pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations_spec = { - "csimdjson.__pyx_scope_struct_4_get_implementations", - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, - __pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations_slots, -}; -#else - -static PyTypeObject __pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""__pyx_scope_struct_4_get_implementations", /*tp_name*/ - sizeof(struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_9csimdjson___pyx_scope_struct_4_get_implementations, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_9csimdjson___pyx_scope_struct_4_get_implementations, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - 0, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_9csimdjson___pyx_scope_struct_4_get_implementations, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif -static struct __pyx_vtabstruct_array __pyx_vtable_array; - -static PyObject *__pyx_tp_new_array(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_array_obj *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_array_obj *)o); - p->__pyx_vtab = __pyx_vtabptr_array; - p->mode = ((PyObject*)Py_None); Py_INCREF(Py_None); - p->_format = ((PyObject*)Py_None); Py_INCREF(Py_None); - if (unlikely(__pyx_array___cinit__(o, a, k) < 0)) goto bad; - return o; - bad: - Py_DECREF(o); o = 0; - return NULL; -} - -static void __pyx_tp_dealloc_array(PyObject *o) { - struct __pyx_array_obj *p = (struct __pyx_array_obj *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && (!PyType_IS_GC(Py_TYPE(o)) || !__Pyx_PyObject_GC_IsFinalized(o))) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_array) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_array___dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - Py_CLEAR(p->mode); - Py_CLEAR(p->_format); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} -static PyObject *__pyx_sq_item_array(PyObject *o, Py_ssize_t i) { - PyObject *r; - PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0; - r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x); - Py_DECREF(x); - return r; -} - -static int __pyx_mp_ass_subscript_array(PyObject *o, PyObject *i, PyObject *v) { - if (v) { - return __pyx_array___setitem__(o, i, v); - } - else { - __Pyx_TypeName o_type_name; - o_type_name = __Pyx_PyType_GetName(Py_TYPE(o)); - PyErr_Format(PyExc_NotImplementedError, - "Subscript deletion not supported by " __Pyx_FMT_TYPENAME, o_type_name); - __Pyx_DECREF_TypeName(o_type_name); - return -1; - } -} - -static PyObject *__pyx_tp_getattro_array(PyObject *o, PyObject *n) { - PyObject *v = __Pyx_PyObject_GenericGetAttr(o, n); - if (!v && PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - v = __pyx_array___getattr__(o, n); - } - return v; -} - -static PyObject *__pyx_getprop___pyx_array_memview(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_5array_7memview_1__get__(o); -} - -static PyMethodDef __pyx_methods_array[] = { - {"__getattr__", (PyCFunction)__pyx_array___getattr__, METH_O|METH_COEXIST, 0}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_array_1__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_array_3__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_array[] = { - {(char *)"memview", __pyx_getprop___pyx_array_memview, 0, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -#if !CYTHON_COMPILING_IN_LIMITED_API - -static PyBufferProcs __pyx_tp_as_buffer_array = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - __pyx_array_getbuffer, /*bf_getbuffer*/ - 0, /*bf_releasebuffer*/ -}; -#endif -static PyType_Slot __pyx_type___pyx_array_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_array}, - {Py_sq_length, (void *)__pyx_array___len__}, - {Py_sq_item, (void *)__pyx_sq_item_array}, - {Py_mp_length, (void *)__pyx_array___len__}, - {Py_mp_subscript, (void *)__pyx_array___getitem__}, - {Py_mp_ass_subscript, (void *)__pyx_mp_ass_subscript_array}, - {Py_tp_getattro, (void *)__pyx_tp_getattro_array}, - #if defined(Py_bf_getbuffer) - {Py_bf_getbuffer, (void *)__pyx_array_getbuffer}, - #endif - {Py_tp_methods, (void *)__pyx_methods_array}, - {Py_tp_getset, (void *)__pyx_getsets_array}, - {Py_tp_new, (void *)__pyx_tp_new_array}, - {0, 0}, -}; -static PyType_Spec __pyx_type___pyx_array_spec = { - "csimdjson.array", - sizeof(struct __pyx_array_obj), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_SEQUENCE, - __pyx_type___pyx_array_slots, -}; -#else - -static PySequenceMethods __pyx_tp_as_sequence_array = { - __pyx_array___len__, /*sq_length*/ - 0, /*sq_concat*/ - 0, /*sq_repeat*/ - __pyx_sq_item_array, /*sq_item*/ - 0, /*sq_slice*/ - 0, /*sq_ass_item*/ - 0, /*sq_ass_slice*/ - 0, /*sq_contains*/ - 0, /*sq_inplace_concat*/ - 0, /*sq_inplace_repeat*/ -}; - -static PyMappingMethods __pyx_tp_as_mapping_array = { - __pyx_array___len__, /*mp_length*/ - __pyx_array___getitem__, /*mp_subscript*/ - __pyx_mp_ass_subscript_array, /*mp_ass_subscript*/ -}; - -static PyBufferProcs __pyx_tp_as_buffer_array = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - __pyx_array_getbuffer, /*bf_getbuffer*/ - 0, /*bf_releasebuffer*/ -}; - -static PyTypeObject __pyx_type___pyx_array = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""array", /*tp_name*/ - sizeof(struct __pyx_array_obj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_array, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - &__pyx_tp_as_sequence_array, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_array, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - __pyx_tp_getattro_array, /*tp_getattro*/ - 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_array, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_SEQUENCE, /*tp_flags*/ - 0, /*tp_doc*/ - 0, /*tp_traverse*/ - 0, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_array, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_array, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_array, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyObject *__pyx_tp_new_Enum(PyTypeObject *t, CYTHON_UNUSED PyObject *a, CYTHON_UNUSED PyObject *k) { - struct __pyx_MemviewEnum_obj *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_MemviewEnum_obj *)o); - p->name = Py_None; Py_INCREF(Py_None); - return o; -} - -static void __pyx_tp_dealloc_Enum(PyObject *o) { - struct __pyx_MemviewEnum_obj *p = (struct __pyx_MemviewEnum_obj *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_Enum) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - Py_CLEAR(p->name); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_Enum(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_MemviewEnum_obj *p = (struct __pyx_MemviewEnum_obj *)o; - if (p->name) { - e = (*v)(p->name, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_Enum(PyObject *o) { - PyObject* tmp; - struct __pyx_MemviewEnum_obj *p = (struct __pyx_MemviewEnum_obj *)o; - tmp = ((PyObject*)p->name); - p->name = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - return 0; -} - -static PyObject *__pyx_specialmethod___pyx_MemviewEnum___repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - return __pyx_MemviewEnum___repr__(self); -} - -static PyMethodDef __pyx_methods_Enum[] = { - {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_MemviewEnum___repr__, METH_NOARGS|METH_COEXIST, 0}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_MemviewEnum_1__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_MemviewEnum_3__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type___pyx_MemviewEnum_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_Enum}, - {Py_tp_repr, (void *)__pyx_MemviewEnum___repr__}, - {Py_tp_traverse, (void *)__pyx_tp_traverse_Enum}, - {Py_tp_clear, (void *)__pyx_tp_clear_Enum}, - {Py_tp_methods, (void *)__pyx_methods_Enum}, - {Py_tp_init, (void *)__pyx_MemviewEnum___init__}, - {Py_tp_new, (void *)__pyx_tp_new_Enum}, - {0, 0}, -}; -static PyType_Spec __pyx_type___pyx_MemviewEnum_spec = { - "csimdjson.Enum", - sizeof(struct __pyx_MemviewEnum_obj), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type___pyx_MemviewEnum_slots, -}; -#else - -static PyTypeObject __pyx_type___pyx_MemviewEnum = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""Enum", /*tp_name*/ - sizeof(struct __pyx_MemviewEnum_obj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_Enum, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - __pyx_MemviewEnum___repr__, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_Enum, /*tp_traverse*/ - __pyx_tp_clear_Enum, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_Enum, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - __pyx_MemviewEnum___init__, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_Enum, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif -static struct __pyx_vtabstruct_memoryview __pyx_vtable_memoryview; - -static PyObject *__pyx_tp_new_memoryview(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_memoryview_obj *p; - PyObject *o; - #if CYTHON_COMPILING_IN_LIMITED_API - allocfunc alloc_func = (allocfunc)PyType_GetSlot(t, Py_tp_alloc); - o = alloc_func(t, 0); - #else - if (likely(!__Pyx_PyType_HasFeature(t, Py_TPFLAGS_IS_ABSTRACT))) { - o = (*t->tp_alloc)(t, 0); - } else { - o = (PyObject *) PyBaseObject_Type.tp_new(t, __pyx_empty_tuple, 0); - } - if (unlikely(!o)) return 0; - #endif - p = ((struct __pyx_memoryview_obj *)o); - p->__pyx_vtab = __pyx_vtabptr_memoryview; - p->obj = Py_None; Py_INCREF(Py_None); - p->_size = Py_None; Py_INCREF(Py_None); - p->_array_interface = Py_None; Py_INCREF(Py_None); - p->view.obj = NULL; - if (unlikely(__pyx_memoryview___cinit__(o, a, k) < 0)) goto bad; - return o; - bad: - Py_DECREF(o); o = 0; - return NULL; -} - -static void __pyx_tp_dealloc_memoryview(PyObject *o) { - struct __pyx_memoryview_obj *p = (struct __pyx_memoryview_obj *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc_memoryview) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_memoryview___dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - Py_CLEAR(p->obj); - Py_CLEAR(p->_size); - Py_CLEAR(p->_array_interface); - #if CYTHON_USE_TYPE_SLOTS || CYTHON_COMPILING_IN_PYPY - (*Py_TYPE(o)->tp_free)(o); - #else - { - freefunc tp_free = (freefunc)PyType_GetSlot(Py_TYPE(o), Py_tp_free); - if (tp_free) tp_free(o); - } - #endif -} - -static int __pyx_tp_traverse_memoryview(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_memoryview_obj *p = (struct __pyx_memoryview_obj *)o; - if (p->obj) { - e = (*v)(p->obj, a); if (e) return e; - } - if (p->_size) { - e = (*v)(p->_size, a); if (e) return e; - } - if (p->_array_interface) { - e = (*v)(p->_array_interface, a); if (e) return e; - } - if (p->view.obj) { - e = (*v)(p->view.obj, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear_memoryview(PyObject *o) { - PyObject* tmp; - struct __pyx_memoryview_obj *p = (struct __pyx_memoryview_obj *)o; - tmp = ((PyObject*)p->obj); - p->obj = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_size); - p->_size = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - tmp = ((PyObject*)p->_array_interface); - p->_array_interface = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - Py_CLEAR(p->view.obj); - return 0; -} -static PyObject *__pyx_sq_item_memoryview(PyObject *o, Py_ssize_t i) { - PyObject *r; - PyObject *x = PyInt_FromSsize_t(i); if(!x) return 0; - r = Py_TYPE(o)->tp_as_mapping->mp_subscript(o, x); - Py_DECREF(x); - return r; -} - -static int __pyx_mp_ass_subscript_memoryview(PyObject *o, PyObject *i, PyObject *v) { - if (v) { - return __pyx_memoryview___setitem__(o, i, v); - } - else { - __Pyx_TypeName o_type_name; - o_type_name = __Pyx_PyType_GetName(Py_TYPE(o)); - PyErr_Format(PyExc_NotImplementedError, - "Subscript deletion not supported by " __Pyx_FMT_TYPENAME, o_type_name); - __Pyx_DECREF_TypeName(o_type_name); - return -1; - } -} - -static PyObject *__pyx_getprop___pyx_memoryview_T(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_1T_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_base(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_4base_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_shape(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_5shape_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_strides(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_7strides_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_suboffsets(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_10suboffsets_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_ndim(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_4ndim_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_itemsize(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_8itemsize_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_nbytes(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_6nbytes_1__get__(o); -} - -static PyObject *__pyx_getprop___pyx_memoryview_size(PyObject *o, CYTHON_UNUSED void *x) { - return __pyx_pw_15View_dot_MemoryView_10memoryview_4size_1__get__(o); -} - -static PyObject *__pyx_specialmethod___pyx_memoryview___repr__(PyObject *self, CYTHON_UNUSED PyObject *arg) { - return __pyx_memoryview___repr__(self); -} - -static PyMethodDef __pyx_methods_memoryview[] = { - {"__repr__", (PyCFunction)__pyx_specialmethod___pyx_memoryview___repr__, METH_NOARGS|METH_COEXIST, 0}, - {"is_c_contig", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_memoryview_is_c_contig, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"is_f_contig", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_memoryview_is_f_contig, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"copy", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_memoryview_copy, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"copy_fortran", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_memoryview_copy_fortran, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_memoryview_1__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_memoryview_3__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; - -static struct PyGetSetDef __pyx_getsets_memoryview[] = { - {(char *)"T", __pyx_getprop___pyx_memoryview_T, 0, (char *)0, 0}, - {(char *)"base", __pyx_getprop___pyx_memoryview_base, 0, (char *)0, 0}, - {(char *)"shape", __pyx_getprop___pyx_memoryview_shape, 0, (char *)0, 0}, - {(char *)"strides", __pyx_getprop___pyx_memoryview_strides, 0, (char *)0, 0}, - {(char *)"suboffsets", __pyx_getprop___pyx_memoryview_suboffsets, 0, (char *)0, 0}, - {(char *)"ndim", __pyx_getprop___pyx_memoryview_ndim, 0, (char *)0, 0}, - {(char *)"itemsize", __pyx_getprop___pyx_memoryview_itemsize, 0, (char *)0, 0}, - {(char *)"nbytes", __pyx_getprop___pyx_memoryview_nbytes, 0, (char *)0, 0}, - {(char *)"size", __pyx_getprop___pyx_memoryview_size, 0, (char *)0, 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -#if !CYTHON_COMPILING_IN_LIMITED_API - -static PyBufferProcs __pyx_tp_as_buffer_memoryview = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - __pyx_memoryview_getbuffer, /*bf_getbuffer*/ - 0, /*bf_releasebuffer*/ -}; -#endif -static PyType_Slot __pyx_type___pyx_memoryview_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc_memoryview}, - {Py_tp_repr, (void *)__pyx_memoryview___repr__}, - {Py_sq_length, (void *)__pyx_memoryview___len__}, - {Py_sq_item, (void *)__pyx_sq_item_memoryview}, - {Py_mp_length, (void *)__pyx_memoryview___len__}, - {Py_mp_subscript, (void *)__pyx_memoryview___getitem__}, - {Py_mp_ass_subscript, (void *)__pyx_mp_ass_subscript_memoryview}, - {Py_tp_str, (void *)__pyx_memoryview___str__}, - #if defined(Py_bf_getbuffer) - {Py_bf_getbuffer, (void *)__pyx_memoryview_getbuffer}, - #endif - {Py_tp_traverse, (void *)__pyx_tp_traverse_memoryview}, - {Py_tp_clear, (void *)__pyx_tp_clear_memoryview}, - {Py_tp_methods, (void *)__pyx_methods_memoryview}, - {Py_tp_getset, (void *)__pyx_getsets_memoryview}, - {Py_tp_new, (void *)__pyx_tp_new_memoryview}, - {0, 0}, -}; -static PyType_Spec __pyx_type___pyx_memoryview_spec = { - "csimdjson.memoryview", - sizeof(struct __pyx_memoryview_obj), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, - __pyx_type___pyx_memoryview_slots, -}; -#else - -static PySequenceMethods __pyx_tp_as_sequence_memoryview = { - __pyx_memoryview___len__, /*sq_length*/ - 0, /*sq_concat*/ - 0, /*sq_repeat*/ - __pyx_sq_item_memoryview, /*sq_item*/ - 0, /*sq_slice*/ - 0, /*sq_ass_item*/ - 0, /*sq_ass_slice*/ - 0, /*sq_contains*/ - 0, /*sq_inplace_concat*/ - 0, /*sq_inplace_repeat*/ -}; - -static PyMappingMethods __pyx_tp_as_mapping_memoryview = { - __pyx_memoryview___len__, /*mp_length*/ - __pyx_memoryview___getitem__, /*mp_subscript*/ - __pyx_mp_ass_subscript_memoryview, /*mp_ass_subscript*/ -}; - -static PyBufferProcs __pyx_tp_as_buffer_memoryview = { - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getreadbuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getwritebuffer*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getsegcount*/ - #endif - #if PY_MAJOR_VERSION < 3 - 0, /*bf_getcharbuffer*/ - #endif - __pyx_memoryview_getbuffer, /*bf_getbuffer*/ - 0, /*bf_releasebuffer*/ -}; - -static PyTypeObject __pyx_type___pyx_memoryview = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""memoryview", /*tp_name*/ - sizeof(struct __pyx_memoryview_obj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc_memoryview, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - __pyx_memoryview___repr__, /*tp_repr*/ - 0, /*tp_as_number*/ - &__pyx_tp_as_sequence_memoryview, /*tp_as_sequence*/ - &__pyx_tp_as_mapping_memoryview, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - __pyx_memoryview___str__, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - &__pyx_tp_as_buffer_memoryview, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - 0, /*tp_doc*/ - __pyx_tp_traverse_memoryview, /*tp_traverse*/ - __pyx_tp_clear_memoryview, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods_memoryview, /*tp_methods*/ - 0, /*tp_members*/ - __pyx_getsets_memoryview, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new_memoryview, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif -static struct __pyx_vtabstruct__memoryviewslice __pyx_vtable__memoryviewslice; - -static PyObject *__pyx_tp_new__memoryviewslice(PyTypeObject *t, PyObject *a, PyObject *k) { - struct __pyx_memoryviewslice_obj *p; - PyObject *o = __pyx_tp_new_memoryview(t, a, k); - if (unlikely(!o)) return 0; - p = ((struct __pyx_memoryviewslice_obj *)o); - p->__pyx_base.__pyx_vtab = (struct __pyx_vtabstruct_memoryview*)__pyx_vtabptr__memoryviewslice; - new((void*)&(p->from_slice)) __Pyx_memviewslice(); - p->from_object = Py_None; Py_INCREF(Py_None); - p->from_slice.memview = NULL; - return o; -} - -static void __pyx_tp_dealloc__memoryviewslice(PyObject *o) { - struct __pyx_memoryviewslice_obj *p = (struct __pyx_memoryviewslice_obj *)o; - #if CYTHON_USE_TP_FINALIZE - if (unlikely((PY_VERSION_HEX >= 0x03080000 || __Pyx_PyType_HasFeature(Py_TYPE(o), Py_TPFLAGS_HAVE_FINALIZE)) && __Pyx_PyObject_GetSlot(o, tp_finalize, destructor)) && !__Pyx_PyObject_GC_IsFinalized(o)) { - if (__Pyx_PyObject_GetSlot(o, tp_dealloc, destructor) == __pyx_tp_dealloc__memoryviewslice) { - if (PyObject_CallFinalizerFromDealloc(o)) return; - } - } - #endif - PyObject_GC_UnTrack(o); - { - PyObject *etype, *eval, *etb; - PyErr_Fetch(&etype, &eval, &etb); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) + 1); - __pyx_memoryviewslice___dealloc__(o); - __Pyx_SET_REFCNT(o, Py_REFCNT(o) - 1); - PyErr_Restore(etype, eval, etb); - } - __Pyx_call_destructor(p->from_slice); - Py_CLEAR(p->from_object); - PyObject_GC_Track(o); - __pyx_tp_dealloc_memoryview(o); -} - -static int __pyx_tp_traverse__memoryviewslice(PyObject *o, visitproc v, void *a) { - int e; - struct __pyx_memoryviewslice_obj *p = (struct __pyx_memoryviewslice_obj *)o; - e = __pyx_tp_traverse_memoryview(o, v, a); if (e) return e; - if (p->from_object) { - e = (*v)(p->from_object, a); if (e) return e; - } - return 0; -} - -static int __pyx_tp_clear__memoryviewslice(PyObject *o) { - PyObject* tmp; - struct __pyx_memoryviewslice_obj *p = (struct __pyx_memoryviewslice_obj *)o; - __pyx_tp_clear_memoryview(o); - tmp = ((PyObject*)p->from_object); - p->from_object = Py_None; Py_INCREF(Py_None); - Py_XDECREF(tmp); - __PYX_XCLEAR_MEMVIEW(&p->from_slice, 1); - return 0; -} - -static PyMethodDef __pyx_methods__memoryviewslice[] = { - {"__reduce_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_memoryviewslice_1__reduce_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {"__setstate_cython__", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw___pyx_memoryviewslice_3__setstate_cython__, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0}, - {0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_type___pyx_memoryviewslice_slots[] = { - {Py_tp_dealloc, (void *)__pyx_tp_dealloc__memoryviewslice}, - {Py_tp_doc, (void *)PyDoc_STR("Internal class for passing memoryview slices to Python")}, - {Py_tp_traverse, (void *)__pyx_tp_traverse__memoryviewslice}, - {Py_tp_clear, (void *)__pyx_tp_clear__memoryviewslice}, - {Py_tp_methods, (void *)__pyx_methods__memoryviewslice}, - {Py_tp_new, (void *)__pyx_tp_new__memoryviewslice}, - {0, 0}, -}; -static PyType_Spec __pyx_type___pyx_memoryviewslice_spec = { - "csimdjson._memoryviewslice", - sizeof(struct __pyx_memoryviewslice_obj), - 0, - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_SEQUENCE, - __pyx_type___pyx_memoryviewslice_slots, -}; -#else - -static PyTypeObject __pyx_type___pyx_memoryviewslice = { - PyVarObject_HEAD_INIT(0, 0) - "csimdjson.""_memoryviewslice", /*tp_name*/ - sizeof(struct __pyx_memoryviewslice_obj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - __pyx_tp_dealloc__memoryviewslice, /*tp_dealloc*/ - #if PY_VERSION_HEX < 0x030800b4 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030800b4 - 0, /*tp_vectorcall_offset*/ - #endif - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - #if PY_MAJOR_VERSION < 3 - 0, /*tp_compare*/ - #endif - #if PY_MAJOR_VERSION >= 3 - 0, /*tp_as_async*/ - #endif - #if CYTHON_COMPILING_IN_PYPY || 0 - __pyx_memoryview___repr__, /*tp_repr*/ - #else - 0, /*tp_repr*/ - #endif - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash*/ - 0, /*tp_call*/ - #if CYTHON_COMPILING_IN_PYPY || 0 - __pyx_memoryview___str__, /*tp_str*/ - #else - 0, /*tp_str*/ - #endif - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT|Py_TPFLAGS_HAVE_VERSION_TAG|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC|Py_TPFLAGS_SEQUENCE, /*tp_flags*/ - PyDoc_STR("Internal class for passing memoryview slices to Python"), /*tp_doc*/ - __pyx_tp_traverse__memoryviewslice, /*tp_traverse*/ - __pyx_tp_clear__memoryviewslice, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - 0, /*tp_iter*/ - 0, /*tp_iternext*/ - __pyx_methods__memoryviewslice, /*tp_methods*/ - 0, /*tp_members*/ - 0, /*tp_getset*/ - 0, /*tp_base*/ - 0, /*tp_dict*/ - 0, /*tp_descr_get*/ - 0, /*tp_descr_set*/ - #if !CYTHON_USE_TYPE_SPECS - 0, /*tp_dictoffset*/ - #endif - 0, /*tp_init*/ - 0, /*tp_alloc*/ - __pyx_tp_new__memoryviewslice, /*tp_new*/ - 0, /*tp_free*/ - 0, /*tp_is_gc*/ - 0, /*tp_bases*/ - 0, /*tp_mro*/ - 0, /*tp_cache*/ - 0, /*tp_subclasses*/ - 0, /*tp_weaklist*/ - 0, /*tp_del*/ - 0, /*tp_version_tag*/ - #if PY_VERSION_HEX >= 0x030400a1 - #if CYTHON_USE_TP_FINALIZE - 0, /*tp_finalize*/ - #else - NULL, /*tp_finalize*/ - #endif - #endif - #if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, /*tp_vectorcall*/ - #endif - #if __PYX_NEED_TP_PRINT_SLOT == 1 - 0, /*tp_print*/ - #endif - #if PY_VERSION_HEX >= 0x030C0000 - 0, /*tp_watched*/ - #endif - #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, /*tp_pypy_flags*/ - #endif -}; -#endif - -static PyMethodDef __pyx_methods[] = { - {0, 0, 0, 0} -}; -#ifndef CYTHON_SMALL_CODE -#if defined(__clang__) - #define CYTHON_SMALL_CODE -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) - #define CYTHON_SMALL_CODE __attribute__((cold)) -#else - #define CYTHON_SMALL_CODE -#endif -#endif -/* #### Code section: pystring_table ### */ - -static int __Pyx_CreateStringTabAndInitStrings(void) { - __Pyx_StringTabEntry __pyx_string_tab[] = { - {&__pyx_kp_u_, __pyx_k_, sizeof(__pyx_k_), 0, 1, 0, 0}, - {&__pyx_n_s_ASCII, __pyx_k_ASCII, sizeof(__pyx_k_ASCII), 0, 0, 1, 1}, - {&__pyx_kp_s_All_dimensions_preceding_dimensi, __pyx_k_All_dimensions_preceding_dimensi, sizeof(__pyx_k_All_dimensions_preceding_dimensi), 0, 0, 1, 0}, - {&__pyx_n_s_Array, __pyx_k_Array, sizeof(__pyx_k_Array), 0, 0, 1, 1}, - {&__pyx_n_s_ArrayBuffer, __pyx_k_ArrayBuffer, sizeof(__pyx_k_ArrayBuffer), 0, 0, 1, 1}, - {&__pyx_n_s_ArrayBuffer___reduce_cython, __pyx_k_ArrayBuffer___reduce_cython, sizeof(__pyx_k_ArrayBuffer___reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_ArrayBuffer___setstate_cython, __pyx_k_ArrayBuffer___setstate_cython, sizeof(__pyx_k_ArrayBuffer___setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Array___iter, __pyx_k_Array___iter, sizeof(__pyx_k_Array___iter), 0, 0, 1, 1}, - {&__pyx_n_s_Array___reduce_cython, __pyx_k_Array___reduce_cython, sizeof(__pyx_k_Array___reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Array___setstate_cython, __pyx_k_Array___setstate_cython, sizeof(__pyx_k_Array___setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Array_as_buffer, __pyx_k_Array_as_buffer, sizeof(__pyx_k_Array_as_buffer), 0, 0, 1, 1}, - {&__pyx_n_s_Array_as_list, __pyx_k_Array_as_list, sizeof(__pyx_k_Array_as_list), 0, 0, 1, 1}, - {&__pyx_n_s_Array_at_pointer, __pyx_k_Array_at_pointer, sizeof(__pyx_k_Array_at_pointer), 0, 0, 1, 1}, - {&__pyx_n_s_AssertionError, __pyx_k_AssertionError, sizeof(__pyx_k_AssertionError), 0, 0, 1, 1}, - {&__pyx_kp_u_Attempted_to_set_a_runtime_Imple, __pyx_k_Attempted_to_set_a_runtime_Imple, sizeof(__pyx_k_Attempted_to_set_a_runtime_Imple), 0, 1, 0, 0}, - {&__pyx_kp_s_Buffer_view_does_not_expose_stri, __pyx_k_Buffer_view_does_not_expose_stri, sizeof(__pyx_k_Buffer_view_does_not_expose_stri), 0, 0, 1, 0}, - {&__pyx_kp_s_Can_only_create_a_buffer_that_is, __pyx_k_Can_only_create_a_buffer_that_is, sizeof(__pyx_k_Can_only_create_a_buffer_that_is), 0, 0, 1, 0}, - {&__pyx_kp_s_Cannot_assign_to_read_only_memor, __pyx_k_Cannot_assign_to_read_only_memor, sizeof(__pyx_k_Cannot_assign_to_read_only_memor), 0, 0, 1, 0}, - {&__pyx_kp_s_Cannot_create_writable_memory_vi, __pyx_k_Cannot_create_writable_memory_vi, sizeof(__pyx_k_Cannot_create_writable_memory_vi), 0, 0, 1, 0}, - {&__pyx_kp_u_Cannot_index_with_type, __pyx_k_Cannot_index_with_type, sizeof(__pyx_k_Cannot_index_with_type), 0, 1, 0, 0}, - {&__pyx_kp_s_Cannot_transpose_memoryview_with, __pyx_k_Cannot_transpose_memoryview_with, sizeof(__pyx_k_Cannot_transpose_memoryview_with), 0, 0, 1, 0}, - {&__pyx_kp_s_Dimension_d_is_not_direct, __pyx_k_Dimension_d_is_not_direct, sizeof(__pyx_k_Dimension_d_is_not_direct), 0, 0, 1, 0}, - {&__pyx_kp_u_EMPTY_no_JSON_found, __pyx_k_EMPTY_no_JSON_found, sizeof(__pyx_k_EMPTY_no_JSON_found), 0, 1, 0, 0}, - {&__pyx_n_s_Ellipsis, __pyx_k_Ellipsis, sizeof(__pyx_k_Ellipsis), 0, 0, 1, 1}, - {&__pyx_kp_s_Empty_shape_tuple_for_cython_arr, __pyx_k_Empty_shape_tuple_for_cython_arr, sizeof(__pyx_k_Empty_shape_tuple_for_cython_arr), 0, 0, 1, 0}, - {&__pyx_kp_u_Encountered_an_unknown_element_t, __pyx_k_Encountered_an_unknown_element_t, sizeof(__pyx_k_Encountered_an_unknown_element_t), 0, 1, 0, 0}, - {&__pyx_kp_s_Incompatible_checksums_0x_x_vs_0, __pyx_k_Incompatible_checksums_0x_x_vs_0, sizeof(__pyx_k_Incompatible_checksums_0x_x_vs_0), 0, 0, 1, 0}, - {&__pyx_n_s_IndexError, __pyx_k_IndexError, sizeof(__pyx_k_IndexError), 0, 0, 1, 1}, - {&__pyx_kp_s_Index_out_of_bounds_axis_d, __pyx_k_Index_out_of_bounds_axis_d, sizeof(__pyx_k_Index_out_of_bounds_axis_d), 0, 0, 1, 0}, - {&__pyx_kp_s_Indirect_dimensions_not_supporte, __pyx_k_Indirect_dimensions_not_supporte, sizeof(__pyx_k_Indirect_dimensions_not_supporte), 0, 0, 1, 0}, - {&__pyx_kp_u_Invalid_mode_expected_c_or_fortr, __pyx_k_Invalid_mode_expected_c_or_fortr, sizeof(__pyx_k_Invalid_mode_expected_c_or_fortr), 0, 1, 0, 0}, - {&__pyx_kp_u_Invalid_shape_in_axis, __pyx_k_Invalid_shape_in_axis, sizeof(__pyx_k_Invalid_shape_in_axis), 0, 1, 0, 0}, - {&__pyx_n_s_KeyError, __pyx_k_KeyError, sizeof(__pyx_k_KeyError), 0, 0, 1, 1}, - {&__pyx_n_s_MAXSIZE_BYTES, __pyx_k_MAXSIZE_BYTES, sizeof(__pyx_k_MAXSIZE_BYTES), 0, 0, 1, 1}, - {&__pyx_n_s_MemoryError, __pyx_k_MemoryError, sizeof(__pyx_k_MemoryError), 0, 0, 1, 1}, - {&__pyx_kp_s_MemoryView_of_r_at_0x_x, __pyx_k_MemoryView_of_r_at_0x_x, sizeof(__pyx_k_MemoryView_of_r_at_0x_x), 0, 0, 1, 0}, - {&__pyx_kp_s_MemoryView_of_r_object, __pyx_k_MemoryView_of_r_object, sizeof(__pyx_k_MemoryView_of_r_object), 0, 0, 1, 0}, - {&__pyx_n_b_O, __pyx_k_O, sizeof(__pyx_k_O), 0, 0, 0, 1}, - {&__pyx_n_s_Object, __pyx_k_Object, sizeof(__pyx_k_Object), 0, 0, 1, 1}, - {&__pyx_n_s_Object___iter, __pyx_k_Object___iter, sizeof(__pyx_k_Object___iter), 0, 0, 1, 1}, - {&__pyx_n_s_Object___reduce_cython, __pyx_k_Object___reduce_cython, sizeof(__pyx_k_Object___reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Object___setstate_cython, __pyx_k_Object___setstate_cython, sizeof(__pyx_k_Object___setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Object_as_dict, __pyx_k_Object_as_dict, sizeof(__pyx_k_Object_as_dict), 0, 0, 1, 1}, - {&__pyx_n_s_Object_at_pointer, __pyx_k_Object_at_pointer, sizeof(__pyx_k_Object_at_pointer), 0, 0, 1, 1}, - {&__pyx_n_s_Object_get, __pyx_k_Object_get, sizeof(__pyx_k_Object_get), 0, 0, 1, 1}, - {&__pyx_n_s_Object_items, __pyx_k_Object_items, sizeof(__pyx_k_Object_items), 0, 0, 1, 1}, - {&__pyx_n_s_Object_values, __pyx_k_Object_values, sizeof(__pyx_k_Object_values), 0, 0, 1, 1}, - {&__pyx_kp_u_Out_of_bounds_on_buffer_access_a, __pyx_k_Out_of_bounds_on_buffer_access_a, sizeof(__pyx_k_Out_of_bounds_on_buffer_access_a), 0, 1, 0, 0}, - {&__pyx_n_s_PADDING, __pyx_k_PADDING, sizeof(__pyx_k_PADDING), 0, 0, 1, 1}, - {&__pyx_n_s_Parser, __pyx_k_Parser, sizeof(__pyx_k_Parser), 0, 0, 1, 1}, - {&__pyx_n_s_Parser___reduce_cython, __pyx_k_Parser___reduce_cython, sizeof(__pyx_k_Parser___reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Parser___setstate_cython, __pyx_k_Parser___setstate_cython, sizeof(__pyx_k_Parser___setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_Parser_get_implementations, __pyx_k_Parser_get_implementations, sizeof(__pyx_k_Parser_get_implementations), 0, 0, 1, 1}, - {&__pyx_n_s_Parser_load, __pyx_k_Parser_load, sizeof(__pyx_k_Parser_load), 0, 0, 1, 1}, - {&__pyx_n_s_Parser_parse, __pyx_k_Parser_parse, sizeof(__pyx_k_Parser_parse), 0, 0, 1, 1}, - {&__pyx_n_s_Path, __pyx_k_Path, sizeof(__pyx_k_Path), 0, 0, 1, 1}, - {&__pyx_n_s_PickleError, __pyx_k_PickleError, sizeof(__pyx_k_PickleError), 0, 0, 1, 1}, - {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, - {&__pyx_n_s_Sequence, __pyx_k_Sequence, sizeof(__pyx_k_Sequence), 0, 0, 1, 1}, - {&__pyx_kp_s_Step_may_not_be_zero_axis_d, __pyx_k_Step_may_not_be_zero_axis_d, sizeof(__pyx_k_Step_may_not_be_zero_axis_d), 0, 0, 1, 0}, - {&__pyx_kp_u_Tried_to_re_use_a_parser_while_s, __pyx_k_Tried_to_re_use_a_parser_while_s, sizeof(__pyx_k_Tried_to_re_use_a_parser_while_s), 0, 1, 0, 0}, - {&__pyx_n_s_TypeError, __pyx_k_TypeError, sizeof(__pyx_k_TypeError), 0, 0, 1, 1}, - {&__pyx_kp_s_Unable_to_convert_item_to_object, __pyx_k_Unable_to_convert_item_to_object, sizeof(__pyx_k_Unable_to_convert_item_to_object), 0, 0, 1, 0}, - {&__pyx_kp_u_Unknown_Implementation, __pyx_k_Unknown_Implementation, sizeof(__pyx_k_Unknown_Implementation), 0, 1, 0, 0}, - {&__pyx_n_s_VERSION, __pyx_k_VERSION, sizeof(__pyx_k_VERSION), 0, 0, 1, 1}, - {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, - {&__pyx_n_s_View_MemoryView, __pyx_k_View_MemoryView, sizeof(__pyx_k_View_MemoryView), 0, 0, 1, 1}, - {&__pyx_kp_u__2, __pyx_k__2, sizeof(__pyx_k__2), 0, 1, 0, 0}, - {&__pyx_n_s__3, __pyx_k__3, sizeof(__pyx_k__3), 0, 0, 1, 1}, - {&__pyx_n_s__59, __pyx_k__59, sizeof(__pyx_k__59), 0, 0, 1, 1}, - {&__pyx_kp_u__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 1, 0, 0}, - {&__pyx_kp_u__7, __pyx_k__7, sizeof(__pyx_k__7), 0, 1, 0, 0}, - {&__pyx_n_s_abc, __pyx_k_abc, sizeof(__pyx_k_abc), 0, 0, 1, 1}, - {&__pyx_n_s_allocate_buffer, __pyx_k_allocate_buffer, sizeof(__pyx_k_allocate_buffer), 0, 0, 1, 1}, - {&__pyx_kp_u_and, __pyx_k_and, sizeof(__pyx_k_and), 0, 1, 0, 0}, - {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1}, - {&__pyx_n_s_as_buffer, __pyx_k_as_buffer, sizeof(__pyx_k_as_buffer), 0, 0, 1, 1}, - {&__pyx_n_s_as_dict, __pyx_k_as_dict, sizeof(__pyx_k_as_dict), 0, 0, 1, 1}, - {&__pyx_n_s_as_list, __pyx_k_as_list, sizeof(__pyx_k_as_list), 0, 0, 1, 1}, - {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1}, - {&__pyx_n_s_at_pointer, __pyx_k_at_pointer, sizeof(__pyx_k_at_pointer), 0, 0, 1, 1}, - {&__pyx_n_s_base, __pyx_k_base, sizeof(__pyx_k_base), 0, 0, 1, 1}, - {&__pyx_n_s_bytes_data, __pyx_k_bytes_data, sizeof(__pyx_k_bytes_data), 0, 0, 1, 1}, - {&__pyx_n_s_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 0, 1, 1}, - {&__pyx_n_u_c, __pyx_k_c, sizeof(__pyx_k_c), 0, 1, 0, 1}, - {&__pyx_n_s_class, __pyx_k_class, sizeof(__pyx_k_class), 0, 0, 1, 1}, - {&__pyx_n_s_class_getitem, __pyx_k_class_getitem, sizeof(__pyx_k_class_getitem), 0, 0, 1, 1}, - {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1}, - {&__pyx_n_s_close, __pyx_k_close, sizeof(__pyx_k_close), 0, 0, 1, 1}, - {&__pyx_n_s_collections, __pyx_k_collections, sizeof(__pyx_k_collections), 0, 0, 1, 1}, - {&__pyx_kp_s_collections_abc, __pyx_k_collections_abc, sizeof(__pyx_k_collections_abc), 0, 0, 1, 0}, - {&__pyx_kp_s_contiguous_and_direct, __pyx_k_contiguous_and_direct, sizeof(__pyx_k_contiguous_and_direct), 0, 0, 1, 0}, - {&__pyx_kp_s_contiguous_and_indirect, __pyx_k_contiguous_and_indirect, sizeof(__pyx_k_contiguous_and_indirect), 0, 0, 1, 0}, - {&__pyx_n_s_count, __pyx_k_count, sizeof(__pyx_k_count), 0, 0, 1, 1}, - {&__pyx_n_s_csimdjson, __pyx_k_csimdjson, sizeof(__pyx_k_csimdjson), 0, 0, 1, 1}, - {&__pyx_n_u_d, __pyx_k_d, sizeof(__pyx_k_d), 0, 1, 0, 1}, - {&__pyx_n_s_data, __pyx_k_data, sizeof(__pyx_k_data), 0, 0, 1, 1}, - {&__pyx_n_s_default, __pyx_k_default, sizeof(__pyx_k_default), 0, 0, 1, 1}, - {&__pyx_n_s_dict, __pyx_k_dict, sizeof(__pyx_k_dict), 0, 0, 1, 1}, - {&__pyx_kp_u_disable, __pyx_k_disable, sizeof(__pyx_k_disable), 0, 1, 0, 0}, - {&__pyx_n_s_document, __pyx_k_document, sizeof(__pyx_k_document), 0, 0, 1, 1}, - {&__pyx_n_s_dtype_is_object, __pyx_k_dtype_is_object, sizeof(__pyx_k_dtype_is_object), 0, 0, 1, 1}, - {&__pyx_kp_u_enable, __pyx_k_enable, sizeof(__pyx_k_enable), 0, 1, 0, 0}, - {&__pyx_n_s_encode, __pyx_k_encode, sizeof(__pyx_k_encode), 0, 0, 1, 1}, - {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, - {&__pyx_n_s_error, __pyx_k_error, sizeof(__pyx_k_error), 0, 0, 1, 1}, - {&__pyx_n_s_flags, __pyx_k_flags, sizeof(__pyx_k_flags), 0, 0, 1, 1}, - {&__pyx_n_s_format, __pyx_k_format, sizeof(__pyx_k_format), 0, 0, 1, 1}, - {&__pyx_n_s_fortran, __pyx_k_fortran, sizeof(__pyx_k_fortran), 0, 0, 1, 1}, - {&__pyx_n_u_fortran, __pyx_k_fortran, sizeof(__pyx_k_fortran), 0, 1, 0, 1}, - {&__pyx_kp_u_gc, __pyx_k_gc, sizeof(__pyx_k_gc), 0, 1, 0, 0}, - {&__pyx_n_s_get, __pyx_k_get, sizeof(__pyx_k_get), 0, 0, 1, 1}, - {&__pyx_n_s_get_implementations, __pyx_k_get_implementations, sizeof(__pyx_k_get_implementations), 0, 0, 1, 1}, - {&__pyx_n_s_getstate, __pyx_k_getstate, sizeof(__pyx_k_getstate), 0, 0, 1, 1}, - {&__pyx_kp_u_got, __pyx_k_got, sizeof(__pyx_k_got), 0, 1, 0, 0}, - {&__pyx_kp_u_got_differing_extents_in_dimensi, __pyx_k_got_differing_extents_in_dimensi, sizeof(__pyx_k_got_differing_extents_in_dimensi), 0, 1, 0, 0}, - {&__pyx_n_u_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 1, 0, 1}, - {&__pyx_n_s_id, __pyx_k_id, sizeof(__pyx_k_id), 0, 0, 1, 1}, - {&__pyx_n_s_impl, __pyx_k_impl, sizeof(__pyx_k_impl), 0, 0, 1, 1}, - {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, - {&__pyx_n_s_index, __pyx_k_index, sizeof(__pyx_k_index), 0, 0, 1, 1}, - {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1}, - {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1}, - {&__pyx_kp_u_isenabled, __pyx_k_isenabled, sizeof(__pyx_k_isenabled), 0, 1, 0, 0}, - {&__pyx_n_s_it, __pyx_k_it, sizeof(__pyx_k_it), 0, 0, 1, 1}, - {&__pyx_n_s_items, __pyx_k_items, sizeof(__pyx_k_items), 0, 0, 1, 1}, - {&__pyx_n_s_itemsize, __pyx_k_itemsize, sizeof(__pyx_k_itemsize), 0, 0, 1, 1}, - {&__pyx_kp_s_itemsize_0_for_cython_array, __pyx_k_itemsize_0_for_cython_array, sizeof(__pyx_k_itemsize_0_for_cython_array), 0, 0, 1, 0}, - {&__pyx_n_s_iter, __pyx_k_iter, sizeof(__pyx_k_iter), 0, 0, 1, 1}, - {&__pyx_n_s_json_pointer, __pyx_k_json_pointer, sizeof(__pyx_k_json_pointer), 0, 0, 1, 1}, - {&__pyx_n_s_key, __pyx_k_key, sizeof(__pyx_k_key), 0, 0, 1, 1}, - {&__pyx_n_s_keys, __pyx_k_keys, sizeof(__pyx_k_keys), 0, 0, 1, 1}, - {&__pyx_n_s_load, __pyx_k_load, sizeof(__pyx_k_load), 0, 0, 1, 1}, - {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, - {&__pyx_n_s_max_capacity, __pyx_k_max_capacity, sizeof(__pyx_k_max_capacity), 0, 0, 1, 1}, - {&__pyx_n_s_memview, __pyx_k_memview, sizeof(__pyx_k_memview), 0, 0, 1, 1}, - {&__pyx_n_s_mode, __pyx_k_mode, sizeof(__pyx_k_mode), 0, 0, 1, 1}, - {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, - {&__pyx_n_s_name_2, __pyx_k_name_2, sizeof(__pyx_k_name_2), 0, 0, 1, 1}, - {&__pyx_n_s_ndim, __pyx_k_ndim, sizeof(__pyx_k_ndim), 0, 0, 1, 1}, - {&__pyx_n_s_new, __pyx_k_new, sizeof(__pyx_k_new), 0, 0, 1, 1}, - {&__pyx_kp_s_no_default___reduce___due_to_non, __pyx_k_no_default___reduce___due_to_non, sizeof(__pyx_k_no_default___reduce___due_to_non), 0, 0, 1, 0}, - {&__pyx_n_s_obj, __pyx_k_obj, sizeof(__pyx_k_obj), 0, 0, 1, 1}, - {&__pyx_n_s_of_type, __pyx_k_of_type, sizeof(__pyx_k_of_type), 0, 0, 1, 1}, - {&__pyx_kp_u_of_type_must_be_one_of_d_i_u, __pyx_k_of_type_must_be_one_of_d_i_u, sizeof(__pyx_k_of_type_must_be_one_of_d_i_u), 0, 1, 0, 0}, - {&__pyx_n_s_pack, __pyx_k_pack, sizeof(__pyx_k_pack), 0, 0, 1, 1}, - {&__pyx_n_s_parse, __pyx_k_parse, sizeof(__pyx_k_parse), 0, 0, 1, 1}, - {&__pyx_n_s_path, __pyx_k_path, sizeof(__pyx_k_path), 0, 0, 1, 1}, - {&__pyx_n_s_pathlib, __pyx_k_pathlib, sizeof(__pyx_k_pathlib), 0, 0, 1, 1}, - {&__pyx_n_s_pickle, __pyx_k_pickle, sizeof(__pyx_k_pickle), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_PickleError, __pyx_k_pyx_PickleError, sizeof(__pyx_k_pyx_PickleError), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_checksum, __pyx_k_pyx_checksum, sizeof(__pyx_k_pyx_checksum), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_result, __pyx_k_pyx_result, sizeof(__pyx_k_pyx_result), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_state, __pyx_k_pyx_state, sizeof(__pyx_k_pyx_state), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_type, __pyx_k_pyx_type, sizeof(__pyx_k_pyx_type), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_unpickle_Enum, __pyx_k_pyx_unpickle_Enum, sizeof(__pyx_k_pyx_unpickle_Enum), 0, 0, 1, 1}, - {&__pyx_n_s_pyx_vtable, __pyx_k_pyx_vtable, sizeof(__pyx_k_pyx_vtable), 0, 0, 1, 1}, - {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, - {&__pyx_n_s_recursive, __pyx_k_recursive, sizeof(__pyx_k_recursive), 0, 0, 1, 1}, - {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1}, - {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1}, - {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1}, - {&__pyx_n_s_register, __pyx_k_register, sizeof(__pyx_k_register), 0, 0, 1, 1}, - {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1}, - {&__pyx_kp_s_self_c_element_self_c_parser_can, __pyx_k_self_c_element_self_c_parser_can, sizeof(__pyx_k_self_c_element_self_c_parser_can), 0, 0, 1, 0}, - {&__pyx_n_s_send, __pyx_k_send, sizeof(__pyx_k_send), 0, 0, 1, 1}, - {&__pyx_n_s_setstate, __pyx_k_setstate, sizeof(__pyx_k_setstate), 0, 0, 1, 1}, - {&__pyx_n_s_setstate_cython, __pyx_k_setstate_cython, sizeof(__pyx_k_setstate_cython), 0, 0, 1, 1}, - {&__pyx_n_s_shape, __pyx_k_shape, sizeof(__pyx_k_shape), 0, 0, 1, 1}, - {&__pyx_kp_s_simdjson_csimdjson_pyx, __pyx_k_simdjson_csimdjson_pyx, sizeof(__pyx_k_simdjson_csimdjson_pyx), 0, 0, 1, 0}, - {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, - {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1}, - {&__pyx_n_s_src, __pyx_k_src, sizeof(__pyx_k_src), 0, 0, 1, 1}, - {&__pyx_n_s_start, __pyx_k_start, sizeof(__pyx_k_start), 0, 0, 1, 1}, - {&__pyx_n_s_step, __pyx_k_step, sizeof(__pyx_k_step), 0, 0, 1, 1}, - {&__pyx_n_s_stop, __pyx_k_stop, sizeof(__pyx_k_stop), 0, 0, 1, 1}, - {&__pyx_n_s_str_data, __pyx_k_str_data, sizeof(__pyx_k_str_data), 0, 0, 1, 1}, - {&__pyx_n_s_str_size, __pyx_k_str_size, sizeof(__pyx_k_str_size), 0, 0, 1, 1}, - {&__pyx_kp_s_strided_and_direct, __pyx_k_strided_and_direct, sizeof(__pyx_k_strided_and_direct), 0, 0, 1, 0}, - {&__pyx_kp_s_strided_and_direct_or_indirect, __pyx_k_strided_and_direct_or_indirect, sizeof(__pyx_k_strided_and_direct_or_indirect), 0, 0, 1, 0}, - {&__pyx_kp_s_strided_and_indirect, __pyx_k_strided_and_indirect, sizeof(__pyx_k_strided_and_indirect), 0, 0, 1, 0}, - {&__pyx_kp_s_stringsource, __pyx_k_stringsource, sizeof(__pyx_k_stringsource), 0, 0, 1, 0}, - {&__pyx_n_s_struct, __pyx_k_struct, sizeof(__pyx_k_struct), 0, 0, 1, 1}, - {&__pyx_n_s_supported_by_runtime, __pyx_k_supported_by_runtime, sizeof(__pyx_k_supported_by_runtime), 0, 0, 1, 1}, - {&__pyx_n_s_sys, __pyx_k_sys, sizeof(__pyx_k_sys), 0, 0, 1, 1}, - {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_n_s_throw, __pyx_k_throw, sizeof(__pyx_k_throw), 0, 0, 1, 1}, - {&__pyx_n_u_u, __pyx_k_u, sizeof(__pyx_k_u), 0, 1, 0, 1}, - {&__pyx_kp_s_unable_to_allocate_array_data, __pyx_k_unable_to_allocate_array_data, sizeof(__pyx_k_unable_to_allocate_array_data), 0, 0, 1, 0}, - {&__pyx_kp_s_unable_to_allocate_shape_and_str, __pyx_k_unable_to_allocate_shape_and_str, sizeof(__pyx_k_unable_to_allocate_shape_and_str), 0, 0, 1, 0}, - {&__pyx_n_s_unpack, __pyx_k_unpack, sizeof(__pyx_k_unpack), 0, 0, 1, 1}, - {&__pyx_n_s_update, __pyx_k_update, sizeof(__pyx_k_update), 0, 0, 1, 1}, - {&__pyx_kp_u_utf_8, __pyx_k_utf_8, sizeof(__pyx_k_utf_8), 0, 1, 0, 0}, - {&__pyx_n_s_values, __pyx_k_values, sizeof(__pyx_k_values), 0, 0, 1, 1}, - {&__pyx_n_s_version_info, __pyx_k_version_info, sizeof(__pyx_k_version_info), 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 0} - }; - return __Pyx_InitStrings(__pyx_string_tab); -} -/* #### Code section: cached_builtins ### */ -static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(0, 99, __pyx_L1_error) - __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(0, 146, __pyx_L1_error) - __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) __PYX_ERR(1, 2, __pyx_L1_error) - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) __PYX_ERR(0, 194, __pyx_L1_error) - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 194, __pyx_L1_error) - __pyx_builtin_KeyError = __Pyx_GetBuiltinName(__pyx_n_s_KeyError); if (!__pyx_builtin_KeyError) __PYX_ERR(0, 308, __pyx_L1_error) - __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(0, 435, __pyx_L1_error) - __pyx_builtin___import__ = __Pyx_GetBuiltinName(__pyx_n_s_import); if (!__pyx_builtin___import__) __PYX_ERR(1, 100, __pyx_L1_error) - __pyx_builtin_AssertionError = __Pyx_GetBuiltinName(__pyx_n_s_AssertionError); if (!__pyx_builtin_AssertionError) __PYX_ERR(1, 373, __pyx_L1_error) - __pyx_builtin_Ellipsis = __Pyx_GetBuiltinName(__pyx_n_s_Ellipsis); if (!__pyx_builtin_Ellipsis) __PYX_ERR(1, 408, __pyx_L1_error) - __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(1, 618, __pyx_L1_error) - __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 914, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: cached_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - - /* "View.MemoryView":582 - * def suboffsets(self): - * if self.view.suboffsets == NULL: - * return (-1,) * self.view.ndim # <<<<<<<<<<<<<< - * - * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) - */ - __pyx_tuple__4 = PyTuple_New(1); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(1, 582, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__4); - __Pyx_INCREF(__pyx_int_neg_1); - __Pyx_GIVEREF(__pyx_int_neg_1); - if (__Pyx_PyTuple_SET_ITEM(__pyx_tuple__4, 0, __pyx_int_neg_1)) __PYX_ERR(1, 582, __pyx_L1_error); - __Pyx_GIVEREF(__pyx_tuple__4); - - /* "View.MemoryView":679 - * tup = index if isinstance(index, tuple) else (index,) - * - * result = [slice(None)] * ndim # <<<<<<<<<<<<<< - * have_slices = False - * seen_ellipsis = False - */ - __pyx_slice__5 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__5)) __PYX_ERR(1, 679, __pyx_L1_error) - __Pyx_GOTREF(__pyx_slice__5); - __Pyx_GIVEREF(__pyx_slice__5); - - /* "(tree fragment)":4 - * cdef object __pyx_PickleError - * cdef object __pyx_result - * if __pyx_checksum not in (0x82a3537, 0x6ae9995, 0xb068931): # <<<<<<<<<<<<<< - * from pickle import PickleError as __pyx_PickleError - * raise __pyx_PickleError, "Incompatible checksums (0x%x vs (0x82a3537, 0x6ae9995, 0xb068931) = (name))" % __pyx_checksum - */ - __pyx_tuple__8 = PyTuple_Pack(3, __pyx_int_136983863, __pyx_int_112105877, __pyx_int_184977713); if (unlikely(!__pyx_tuple__8)) __PYX_ERR(1, 4, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__8); - __Pyx_GIVEREF(__pyx_tuple__8); - - /* "csimdjson.pyx":99 - * return None - * else: - * raise ValueError( # pragma: no cover # <<<<<<<<<<<<<< - * 'Encountered an unknown element_type.' - * ) - */ - __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_Encountered_an_unknown_element_t); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 99, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__9); - __Pyx_GIVEREF(__pyx_tuple__9); - - /* "csimdjson.pyx":143 - * self.buffer = flatten_array[uint64_t](src, &self.size) - * else: - * raise ValueError('of_type must be one of {d,i,u}.') # <<<<<<<<<<<<<< - * - * if not self.buffer: - */ - __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_of_type_must_be_one_of_d_i_u); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(0, 143, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__10); - __Pyx_GIVEREF(__pyx_tuple__10); - - /* "csimdjson.pyx":435 - * # to recommend against re-use on PyPy. - * if self.c_parser.use_count() > 1: - * raise RuntimeError( # <<<<<<<<<<<<<< - * 'Tried to re-use a parser while simdjson.Object and/or' - * ' simdjson.Array objects still exist referencing the old' - */ - __pyx_tuple__14 = PyTuple_Pack(1, __pyx_kp_u_Tried_to_re_use_a_parser_while_s); if (unlikely(!__pyx_tuple__14)) __PYX_ERR(0, 435, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__14); - __Pyx_GIVEREF(__pyx_tuple__14); - - /* "csimdjson.pyx":483 - * # confusing) IndexError. This isn't a very good error message, - * # but it's identical to the one simdjson would have raised. - * raise ValueError('EMPTY: no JSON found') # <<<<<<<<<<<<<< - * - * return element_to_primitive( - */ - __pyx_tuple__15 = PyTuple_Pack(1, __pyx_kp_u_EMPTY_no_JSON_found); if (unlikely(!__pyx_tuple__15)) __PYX_ERR(0, 483, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__15); - __Pyx_GIVEREF(__pyx_tuple__15); - - /* "csimdjson.pyx":560 - * - * if not impl.supported_by_runtime_system(): - * raise RuntimeError( # <<<<<<<<<<<<<< - * 'Attempted to set a runtime Implementation that is not' - * 'supported on the current host.' - */ - __pyx_tuple__17 = PyTuple_Pack(1, __pyx_kp_u_Attempted_to_set_a_runtime_Imple); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(0, 560, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__17); - __Pyx_GIVEREF(__pyx_tuple__17); - - /* "csimdjson.pyx":568 - * return - * - * raise ValueError('Unknown Implementation') # <<<<<<<<<<<<<< - */ - __pyx_tuple__18 = PyTuple_Pack(1, __pyx_kp_u_Unknown_Implementation); if (unlikely(!__pyx_tuple__18)) __PYX_ERR(0, 568, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__18); - __Pyx_GIVEREF(__pyx_tuple__18); - - /* "View.MemoryView":100 - * cdef object __pyx_collections_abc_Sequence "__pyx_collections_abc_Sequence" - * try: - * if __import__("sys").version_info >= (3, 3): # <<<<<<<<<<<<<< - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - * else: - */ - __pyx_tuple__19 = PyTuple_Pack(1, __pyx_n_s_sys); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(1, 100, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__19); - __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_tuple__20 = PyTuple_Pack(2, __pyx_int_3, __pyx_int_3); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(1, 100, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__20); - __Pyx_GIVEREF(__pyx_tuple__20); - - /* "View.MemoryView":101 - * try: - * if __import__("sys").version_info >= (3, 3): - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence # <<<<<<<<<<<<<< - * else: - * __pyx_collections_abc_Sequence = __import__("collections").Sequence - */ - __pyx_tuple__21 = PyTuple_Pack(1, __pyx_kp_s_collections_abc); if (unlikely(!__pyx_tuple__21)) __PYX_ERR(1, 101, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__21); - __Pyx_GIVEREF(__pyx_tuple__21); - - /* "View.MemoryView":103 - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - * else: - * __pyx_collections_abc_Sequence = __import__("collections").Sequence # <<<<<<<<<<<<<< - * except: - * - */ - __pyx_tuple__22 = PyTuple_Pack(1, __pyx_n_s_collections); if (unlikely(!__pyx_tuple__22)) __PYX_ERR(1, 103, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__22); - __Pyx_GIVEREF(__pyx_tuple__22); - - /* "View.MemoryView":309 - * return self.name - * - * cdef generic = Enum("") # <<<<<<<<<<<<<< - * cdef strided = Enum("") # default - * cdef indirect = Enum("") - */ - __pyx_tuple__23 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct_or_indirect); if (unlikely(!__pyx_tuple__23)) __PYX_ERR(1, 309, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__23); - __Pyx_GIVEREF(__pyx_tuple__23); - - /* "View.MemoryView":310 - * - * cdef generic = Enum("") - * cdef strided = Enum("") # default # <<<<<<<<<<<<<< - * cdef indirect = Enum("") - * - */ - __pyx_tuple__24 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct); if (unlikely(!__pyx_tuple__24)) __PYX_ERR(1, 310, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__24); - __Pyx_GIVEREF(__pyx_tuple__24); - - /* "View.MemoryView":311 - * cdef generic = Enum("") - * cdef strided = Enum("") # default - * cdef indirect = Enum("") # <<<<<<<<<<<<<< - * - * - */ - __pyx_tuple__25 = PyTuple_Pack(1, __pyx_kp_s_strided_and_indirect); if (unlikely(!__pyx_tuple__25)) __PYX_ERR(1, 311, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__25); - __Pyx_GIVEREF(__pyx_tuple__25); - - /* "View.MemoryView":314 - * - * - * cdef contiguous = Enum("") # <<<<<<<<<<<<<< - * cdef indirect_contiguous = Enum("") - * - */ - __pyx_tuple__26 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_direct); if (unlikely(!__pyx_tuple__26)) __PYX_ERR(1, 314, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__26); - __Pyx_GIVEREF(__pyx_tuple__26); - - /* "View.MemoryView":315 - * - * cdef contiguous = Enum("") - * cdef indirect_contiguous = Enum("") # <<<<<<<<<<<<<< - * - * - */ - __pyx_tuple__27 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_indirect); if (unlikely(!__pyx_tuple__27)) __PYX_ERR(1, 315, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__27); - __Pyx_GIVEREF(__pyx_tuple__27); - - /* "(tree fragment)":1 - * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state): # <<<<<<<<<<<<<< - * cdef object __pyx_PickleError - * cdef object __pyx_result - */ - __pyx_tuple__28 = PyTuple_Pack(5, __pyx_n_s_pyx_type, __pyx_n_s_pyx_checksum, __pyx_n_s_pyx_state, __pyx_n_s_pyx_PickleError, __pyx_n_s_pyx_result); if (unlikely(!__pyx_tuple__28)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__28); - __Pyx_GIVEREF(__pyx_tuple__28); - __pyx_codeobj__29 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 5, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__28, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_pyx_unpickle_Enum, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__29)) __PYX_ERR(1, 1, __pyx_L1_error) - __pyx_tuple__30 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__30)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__30); - __Pyx_GIVEREF(__pyx_tuple__30); - __pyx_codeobj__31 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__30, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__31)) __PYX_ERR(1, 1, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_tuple__32 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_pyx_state); if (unlikely(!__pyx_tuple__32)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__32); - __Pyx_GIVEREF(__pyx_tuple__32); - __pyx_codeobj__33 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__32, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__33)) __PYX_ERR(1, 3, __pyx_L1_error) - - /* "csimdjson.pyx":228 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - __pyx_tuple__34 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_json_pointer); if (unlikely(!__pyx_tuple__34)) __PYX_ERR(0, 228, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__34); - __Pyx_GIVEREF(__pyx_tuple__34); - __pyx_codeobj__35 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__34, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_at_pointer, 228, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__35)) __PYX_ERR(0, 228, __pyx_L1_error) - - /* "csimdjson.pyx":237 - * ) - * - * def as_list(self): # <<<<<<<<<<<<<< - * """ - * Convert this Array to a regular python list, recursively - */ - __pyx_codeobj__36 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__30, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_as_list, 237, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__36)) __PYX_ERR(0, 237, __pyx_L1_error) - - /* "csimdjson.pyx":244 - * return array_to_list(self.parser, self.c_element, True) - * - * def as_buffer(self, *, of_type): # <<<<<<<<<<<<<< - * """ - * **Copies** the contents of a **homogeneous** array to an - */ - __pyx_tuple__37 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_of_type); if (unlikely(!__pyx_tuple__37)) __PYX_ERR(0, 244, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__37); - __Pyx_GIVEREF(__pyx_tuple__37); - __pyx_codeobj__38 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__37, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_as_buffer, 244, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__38)) __PYX_ERR(0, 244, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_codeobj__39 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__30, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__39)) __PYX_ERR(1, 1, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - __pyx_codeobj__40 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__32, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__40)) __PYX_ERR(1, 3, __pyx_L1_error) - - /* "csimdjson.pyx":301 - * ) - * - * def get(self, key, default=None): # <<<<<<<<<<<<<< - * """ - * Return the value of `key`, or `default` if the key does - */ - __pyx_tuple__41 = PyTuple_Pack(3, __pyx_n_s_self, __pyx_n_s_key, __pyx_n_s_default); if (unlikely(!__pyx_tuple__41)) __PYX_ERR(0, 301, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__41); - __Pyx_GIVEREF(__pyx_tuple__41); - __pyx_codeobj__42 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 3, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__41, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_get, 301, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__42)) __PYX_ERR(0, 301, __pyx_L1_error) - __pyx_tuple__43 = PyTuple_Pack(1, Py_None); if (unlikely(!__pyx_tuple__43)) __PYX_ERR(0, 301, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__43); - __Pyx_GIVEREF(__pyx_tuple__43); - - /* "csimdjson.pyx":338 - * keys = __iter__ - * - * def values(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over of all values in this `Object`. - */ - __pyx_tuple__44 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_it); if (unlikely(!__pyx_tuple__44)) __PYX_ERR(0, 338, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__44); - __Pyx_GIVEREF(__pyx_tuple__44); - __pyx_codeobj__11 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS|CO_GENERATOR, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__44, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_values, 338, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__11)) __PYX_ERR(0, 338, __pyx_L1_error) - - /* "csimdjson.pyx":347 - * preincrement(it) - * - * def items(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all the (key, value) pairs in this - */ - __pyx_tuple__45 = PyTuple_Pack(4, __pyx_n_s_self, __pyx_n_s_size, __pyx_n_s_data, __pyx_n_s_it); if (unlikely(!__pyx_tuple__45)) __PYX_ERR(0, 347, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__45); - __Pyx_GIVEREF(__pyx_tuple__45); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS|CO_GENERATOR, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__45, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_items, 347, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 347, __pyx_L1_error) - - /* "csimdjson.pyx":366 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - __pyx_codeobj__46 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__34, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_at_pointer, 366, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__46)) __PYX_ERR(0, 366, __pyx_L1_error) - - /* "csimdjson.pyx":375 - * ) - * - * def as_dict(self): # <<<<<<<<<<<<<< - * """ - * Convert this `Object` to a regular python dictionary, - */ - __pyx_codeobj__47 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__30, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_as_dict, 375, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__47)) __PYX_ERR(0, 375, __pyx_L1_error) - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_codeobj__48 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__30, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__48)) __PYX_ERR(1, 1, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - __pyx_codeobj__49 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__32, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__49)) __PYX_ERR(1, 3, __pyx_L1_error) - - /* "csimdjson.pyx":410 - * self.c_parser.reset() - * - * def parse(self, src not None, bint recursive=False): # <<<<<<<<<<<<<< - * """Parse the given JSON document. - * - */ - __pyx_tuple__50 = PyTuple_Pack(7, __pyx_n_s_self, __pyx_n_s_src, __pyx_n_s_recursive, __pyx_n_s_data, __pyx_n_s_str_data, __pyx_n_s_bytes_data, __pyx_n_s_str_size); if (unlikely(!__pyx_tuple__50)) __PYX_ERR(0, 410, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__50); - __Pyx_GIVEREF(__pyx_tuple__50); - __pyx_codeobj__51 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__50, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_parse, 410, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__51)) __PYX_ERR(0, 410, __pyx_L1_error) - __pyx_tuple__52 = PyTuple_Pack(1, Py_False); if (unlikely(!__pyx_tuple__52)) __PYX_ERR(0, 410, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__52); - __Pyx_GIVEREF(__pyx_tuple__52); - - /* "csimdjson.pyx":495 - * ) - * - * def load(self, path, bint recursive=False): # <<<<<<<<<<<<<< - * """Load a JSON document from the file system path `path`. - * - */ - __pyx_tuple__53 = PyTuple_Pack(4, __pyx_n_s_self, __pyx_n_s_path, __pyx_n_s_recursive, __pyx_n_s_document); if (unlikely(!__pyx_tuple__53)) __PYX_ERR(0, 495, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__53); - __Pyx_GIVEREF(__pyx_tuple__53); - __pyx_codeobj__54 = (PyObject*)__Pyx_PyCode_New(3, 0, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__53, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_load, 495, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__54)) __PYX_ERR(0, 495, __pyx_L1_error) - - /* "csimdjson.pyx":521 - * return element_to_primitive(self, document, recursive) - * - * def get_implementations(self, supported_by_runtime=True): # <<<<<<<<<<<<<< - * """ - * A list of available parser implementations in the form of [(name, - */ - __pyx_tuple__55 = PyTuple_Pack(3, __pyx_n_s_self, __pyx_n_s_supported_by_runtime, __pyx_n_s_impl); if (unlikely(!__pyx_tuple__55)) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__55); - __Pyx_GIVEREF(__pyx_tuple__55); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 3, 0, CO_OPTIMIZED|CO_NEWLOCALS|CO_GENERATOR, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__55, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_simdjson_csimdjson_pyx, __pyx_n_s_get_implementations, 521, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 521, __pyx_L1_error) - __pyx_tuple__56 = PyTuple_Pack(1, Py_True); if (unlikely(!__pyx_tuple__56)) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_GOTREF(__pyx_tuple__56); - __Pyx_GIVEREF(__pyx_tuple__56); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_codeobj__57 = (PyObject*)__Pyx_PyCode_New(1, 0, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__30, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_reduce_cython, 1, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__57)) __PYX_ERR(1, 1, __pyx_L1_error) - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_codeobj__58 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__32, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_stringsource, __pyx_n_s_setstate_cython, 3, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__58)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_RefNannyFinishContext(); - return -1; -} -/* #### Code section: init_constants ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) { - if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error); - __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_3 = PyInt_FromLong(3); if (unlikely(!__pyx_int_3)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_112105877 = PyInt_FromLong(112105877L); if (unlikely(!__pyx_int_112105877)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_136983863 = PyInt_FromLong(136983863L); if (unlikely(!__pyx_int_136983863)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_184977713 = PyInt_FromLong(184977713L); if (unlikely(!__pyx_int_184977713)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) __PYX_ERR(0, 1, __pyx_L1_error) - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: init_globals ### */ - -static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) { - /* AssertionsEnabled.init */ - if (likely(__Pyx_init_assertions_enabled() == 0)); else - -if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error) - - return 0; - __pyx_L1_error:; - return -1; -} -/* #### Code section: init_module ### */ - -static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/ -static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/ - -static int __Pyx_modinit_global_init_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0); - /*--- Global init code ---*/ - __pyx_collections_abc_Sequence = Py_None; Py_INCREF(Py_None); - generic = Py_None; Py_INCREF(Py_None); - strided = Py_None; Py_INCREF(Py_None); - indirect = Py_None; Py_INCREF(Py_None); - contiguous = Py_None; Py_INCREF(Py_None); - indirect_contiguous = Py_None; Py_INCREF(Py_None); - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_variable_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0); - /*--- Variable export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_export_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0); - /*--- Function export code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_type_init_code(void) { - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0); - /*--- Type init code ---*/ - __pyx_vtabptr_9csimdjson_ArrayBuffer = &__pyx_vtable_9csimdjson_ArrayBuffer; - __pyx_vtable_9csimdjson_ArrayBuffer.from_element = (PyObject *(*)(simdjson::dom::array, PyObject *))__pyx_f_9csimdjson_11ArrayBuffer_from_element; - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson_ArrayBuffer = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson_ArrayBuffer_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson_ArrayBuffer)) __PYX_ERR(0, 104, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - __pyx_ptype_9csimdjson_ArrayBuffer->tp_as_buffer = &__pyx_tp_as_buffer_ArrayBuffer; - #elif defined(Py_bf_getbuffer) && defined(Py_bf_releasebuffer) - /* PY_VERSION_HEX >= 0x03090000 || Py_LIMITED_API >= 0x030B0000 */ - #elif defined(_MSC_VER) - #pragma message ("The buffer protocol is not supported in the Limited C-API < 3.11.") - #else - #warning "The buffer protocol is not supported in the Limited C-API < 3.11." - #endif - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson_ArrayBuffer_spec, __pyx_ptype_9csimdjson_ArrayBuffer) < 0) __PYX_ERR(0, 104, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson_ArrayBuffer = &__pyx_type_9csimdjson_ArrayBuffer; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson_ArrayBuffer) < 0) __PYX_ERR(0, 104, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson_ArrayBuffer->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson_ArrayBuffer->tp_dictoffset && __pyx_ptype_9csimdjson_ArrayBuffer->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson_ArrayBuffer->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (__Pyx_SetVtable(__pyx_ptype_9csimdjson_ArrayBuffer, __pyx_vtabptr_9csimdjson_ArrayBuffer) < 0) __PYX_ERR(0, 104, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_MergeVtables(__pyx_ptype_9csimdjson_ArrayBuffer) < 0) __PYX_ERR(0, 104, __pyx_L1_error) - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_ArrayBuffer, (PyObject *) __pyx_ptype_9csimdjson_ArrayBuffer) < 0) __PYX_ERR(0, 104, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_9csimdjson_ArrayBuffer) < 0) __PYX_ERR(0, 104, __pyx_L1_error) - #endif - __pyx_vtabptr_9csimdjson_Array = &__pyx_vtable_9csimdjson_Array; - __pyx_vtable_9csimdjson_Array.from_element = (PyObject *(*)(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element))__pyx_f_9csimdjson_5Array_from_element; - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson_Array = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson_Array_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson_Array)) __PYX_ERR(0, 157, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson_Array_spec, __pyx_ptype_9csimdjson_Array) < 0) __PYX_ERR(0, 157, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson_Array = &__pyx_type_9csimdjson_Array; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson_Array) < 0) __PYX_ERR(0, 157, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson_Array->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson_Array->tp_dictoffset && __pyx_ptype_9csimdjson_Array->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson_Array->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (__Pyx_SetVtable(__pyx_ptype_9csimdjson_Array, __pyx_vtabptr_9csimdjson_Array) < 0) __PYX_ERR(0, 157, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_MergeVtables(__pyx_ptype_9csimdjson_Array) < 0) __PYX_ERR(0, 157, __pyx_L1_error) - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Array, (PyObject *) __pyx_ptype_9csimdjson_Array) < 0) __PYX_ERR(0, 157, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_9csimdjson_Array) < 0) __PYX_ERR(0, 157, __pyx_L1_error) - #endif - __pyx_vtabptr_9csimdjson_Object = &__pyx_vtable_9csimdjson_Object; - __pyx_vtable_9csimdjson_Object.from_element = (PyObject *(*)(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element))__pyx_f_9csimdjson_6Object_from_element; - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson_Object = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson_Object_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson_Object)) __PYX_ERR(0, 275, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson_Object_spec, __pyx_ptype_9csimdjson_Object) < 0) __PYX_ERR(0, 275, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson_Object = &__pyx_type_9csimdjson_Object; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson_Object) < 0) __PYX_ERR(0, 275, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson_Object->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson_Object->tp_dictoffset && __pyx_ptype_9csimdjson_Object->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson_Object->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - #if CYTHON_UPDATE_DESCRIPTOR_DOC - { - PyObject *wrapper = PyObject_GetAttrString((PyObject *)__pyx_ptype_9csimdjson_Object, "__iter__"); if (unlikely(!wrapper)) __PYX_ERR(0, 275, __pyx_L1_error) - if (__Pyx_IS_TYPE(wrapper, &PyWrapperDescr_Type)) { - __pyx_wrapperbase_9csimdjson_6Object_8__iter__ = *((PyWrapperDescrObject *)wrapper)->d_base; - __pyx_wrapperbase_9csimdjson_6Object_8__iter__.doc = __pyx_doc_9csimdjson_6Object_8__iter__; - ((PyWrapperDescrObject *)wrapper)->d_base = &__pyx_wrapperbase_9csimdjson_6Object_8__iter__; - } - } - #endif - if (__Pyx_SetVtable(__pyx_ptype_9csimdjson_Object, __pyx_vtabptr_9csimdjson_Object) < 0) __PYX_ERR(0, 275, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_MergeVtables(__pyx_ptype_9csimdjson_Object) < 0) __PYX_ERR(0, 275, __pyx_L1_error) - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Object, (PyObject *) __pyx_ptype_9csimdjson_Object) < 0) __PYX_ERR(0, 275, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_9csimdjson_Object) < 0) __PYX_ERR(0, 275, __pyx_L1_error) - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson_Parser = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson_Parser_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson_Parser)) __PYX_ERR(0, 392, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson_Parser_spec, __pyx_ptype_9csimdjson_Parser) < 0) __PYX_ERR(0, 392, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson_Parser = &__pyx_type_9csimdjson_Parser; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson_Parser) < 0) __PYX_ERR(0, 392, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson_Parser->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson_Parser->tp_dictoffset && __pyx_ptype_9csimdjson_Parser->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson_Parser->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_Parser, (PyObject *) __pyx_ptype_9csimdjson_Parser) < 0) __PYX_ERR(0, 392, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_ptype_9csimdjson_Parser) < 0) __PYX_ERR(0, 392, __pyx_L1_error) - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson___pyx_scope_struct____iter__ = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson___pyx_scope_struct____iter___spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson___pyx_scope_struct____iter__)) __PYX_ERR(0, 218, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson___pyx_scope_struct____iter___spec, __pyx_ptype_9csimdjson___pyx_scope_struct____iter__) < 0) __PYX_ERR(0, 218, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson___pyx_scope_struct____iter__ = &__pyx_type_9csimdjson___pyx_scope_struct____iter__; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson___pyx_scope_struct____iter__) < 0) __PYX_ERR(0, 218, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson___pyx_scope_struct____iter__->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson___pyx_scope_struct____iter__->tp_dictoffset && __pyx_ptype_9csimdjson___pyx_scope_struct____iter__->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson___pyx_scope_struct____iter__->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__ = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson___pyx_scope_struct_1___iter___spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__)) __PYX_ERR(0, 321, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson___pyx_scope_struct_1___iter___spec, __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__) < 0) __PYX_ERR(0, 321, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__ = &__pyx_type_9csimdjson___pyx_scope_struct_1___iter__; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__) < 0) __PYX_ERR(0, 321, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__->tp_dictoffset && __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson___pyx_scope_struct_1___iter__->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson___pyx_scope_struct_2_values = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson___pyx_scope_struct_2_values_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson___pyx_scope_struct_2_values)) __PYX_ERR(0, 338, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson___pyx_scope_struct_2_values_spec, __pyx_ptype_9csimdjson___pyx_scope_struct_2_values) < 0) __PYX_ERR(0, 338, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson___pyx_scope_struct_2_values = &__pyx_type_9csimdjson___pyx_scope_struct_2_values; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson___pyx_scope_struct_2_values) < 0) __PYX_ERR(0, 338, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson___pyx_scope_struct_2_values->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson___pyx_scope_struct_2_values->tp_dictoffset && __pyx_ptype_9csimdjson___pyx_scope_struct_2_values->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson___pyx_scope_struct_2_values->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson___pyx_scope_struct_3_items = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson___pyx_scope_struct_3_items_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson___pyx_scope_struct_3_items)) __PYX_ERR(0, 347, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson___pyx_scope_struct_3_items_spec, __pyx_ptype_9csimdjson___pyx_scope_struct_3_items) < 0) __PYX_ERR(0, 347, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson___pyx_scope_struct_3_items = &__pyx_type_9csimdjson___pyx_scope_struct_3_items; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson___pyx_scope_struct_3_items) < 0) __PYX_ERR(0, 347, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson___pyx_scope_struct_3_items->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson___pyx_scope_struct_3_items->tp_dictoffset && __pyx_ptype_9csimdjson___pyx_scope_struct_3_items->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson___pyx_scope_struct_3_items->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations_spec, NULL); if (unlikely(!__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations)) __PYX_ERR(0, 521, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations_spec, __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations) < 0) __PYX_ERR(0, 521, __pyx_L1_error) - #else - __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations = &__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations) < 0) __PYX_ERR(0, 521, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations->tp_dictoffset && __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations->tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - } - #endif - __pyx_vtabptr_array = &__pyx_vtable_array; - __pyx_vtable_array.get_memview = (PyObject *(*)(struct __pyx_array_obj *))__pyx_array_get_memview; - #if CYTHON_USE_TYPE_SPECS - __pyx_array_type = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type___pyx_array_spec, NULL); if (unlikely(!__pyx_array_type)) __PYX_ERR(1, 114, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - __pyx_array_type->tp_as_buffer = &__pyx_tp_as_buffer_array; - if (!__pyx_array_type->tp_as_buffer->bf_releasebuffer && __pyx_array_type->tp_base->tp_as_buffer && __pyx_array_type->tp_base->tp_as_buffer->bf_releasebuffer) { - __pyx_array_type->tp_as_buffer->bf_releasebuffer = __pyx_array_type->tp_base->tp_as_buffer->bf_releasebuffer; - } - #elif defined(Py_bf_getbuffer) && defined(Py_bf_releasebuffer) - /* PY_VERSION_HEX >= 0x03090000 || Py_LIMITED_API >= 0x030B0000 */ - #elif defined(_MSC_VER) - #pragma message ("The buffer protocol is not supported in the Limited C-API < 3.11.") - #else - #warning "The buffer protocol is not supported in the Limited C-API < 3.11." - #endif - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_array_spec, __pyx_array_type) < 0) __PYX_ERR(1, 114, __pyx_L1_error) - #else - __pyx_array_type = &__pyx_type___pyx_array; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_array_type) < 0) __PYX_ERR(1, 114, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_array_type->tp_print = 0; - #endif - if (__Pyx_SetVtable(__pyx_array_type, __pyx_vtabptr_array) < 0) __PYX_ERR(1, 114, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_MergeVtables(__pyx_array_type) < 0) __PYX_ERR(1, 114, __pyx_L1_error) - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_array_type) < 0) __PYX_ERR(1, 114, __pyx_L1_error) - #endif - #if CYTHON_USE_TYPE_SPECS - __pyx_MemviewEnum_type = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type___pyx_MemviewEnum_spec, NULL); if (unlikely(!__pyx_MemviewEnum_type)) __PYX_ERR(1, 302, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_MemviewEnum_spec, __pyx_MemviewEnum_type) < 0) __PYX_ERR(1, 302, __pyx_L1_error) - #else - __pyx_MemviewEnum_type = &__pyx_type___pyx_MemviewEnum; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_MemviewEnum_type) < 0) __PYX_ERR(1, 302, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_MemviewEnum_type->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_MemviewEnum_type->tp_dictoffset && __pyx_MemviewEnum_type->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_MemviewEnum_type->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_MemviewEnum_type) < 0) __PYX_ERR(1, 302, __pyx_L1_error) - #endif - __pyx_vtabptr_memoryview = &__pyx_vtable_memoryview; - __pyx_vtable_memoryview.get_item_pointer = (char *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_get_item_pointer; - __pyx_vtable_memoryview.is_slice = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_is_slice; - __pyx_vtable_memoryview.setitem_slice_assignment = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *, PyObject *))__pyx_memoryview_setitem_slice_assignment; - __pyx_vtable_memoryview.setitem_slice_assign_scalar = (PyObject *(*)(struct __pyx_memoryview_obj *, struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_setitem_slice_assign_scalar; - __pyx_vtable_memoryview.setitem_indexed = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *, PyObject *))__pyx_memoryview_setitem_indexed; - __pyx_vtable_memoryview.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryview_convert_item_to_object; - __pyx_vtable_memoryview.assign_item_from_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *, PyObject *))__pyx_memoryview_assign_item_from_object; - __pyx_vtable_memoryview._get_base = (PyObject *(*)(struct __pyx_memoryview_obj *))__pyx_memoryview__get_base; - #if CYTHON_USE_TYPE_SPECS - __pyx_memoryview_type = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type___pyx_memoryview_spec, NULL); if (unlikely(!__pyx_memoryview_type)) __PYX_ERR(1, 337, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - __pyx_memoryview_type->tp_as_buffer = &__pyx_tp_as_buffer_memoryview; - if (!__pyx_memoryview_type->tp_as_buffer->bf_releasebuffer && __pyx_memoryview_type->tp_base->tp_as_buffer && __pyx_memoryview_type->tp_base->tp_as_buffer->bf_releasebuffer) { - __pyx_memoryview_type->tp_as_buffer->bf_releasebuffer = __pyx_memoryview_type->tp_base->tp_as_buffer->bf_releasebuffer; - } - #elif defined(Py_bf_getbuffer) && defined(Py_bf_releasebuffer) - /* PY_VERSION_HEX >= 0x03090000 || Py_LIMITED_API >= 0x030B0000 */ - #elif defined(_MSC_VER) - #pragma message ("The buffer protocol is not supported in the Limited C-API < 3.11.") - #else - #warning "The buffer protocol is not supported in the Limited C-API < 3.11." - #endif - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_memoryview_spec, __pyx_memoryview_type) < 0) __PYX_ERR(1, 337, __pyx_L1_error) - #else - __pyx_memoryview_type = &__pyx_type___pyx_memoryview; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_memoryview_type) < 0) __PYX_ERR(1, 337, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_memoryview_type->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_memoryview_type->tp_dictoffset && __pyx_memoryview_type->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_memoryview_type->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (__Pyx_SetVtable(__pyx_memoryview_type, __pyx_vtabptr_memoryview) < 0) __PYX_ERR(1, 337, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_MergeVtables(__pyx_memoryview_type) < 0) __PYX_ERR(1, 337, __pyx_L1_error) - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_memoryview_type) < 0) __PYX_ERR(1, 337, __pyx_L1_error) - #endif - __pyx_vtabptr__memoryviewslice = &__pyx_vtable__memoryviewslice; - __pyx_vtable__memoryviewslice.__pyx_base = *__pyx_vtabptr_memoryview; - __pyx_vtable__memoryviewslice.__pyx_base.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryviewslice_convert_item_to_object; - __pyx_vtable__memoryviewslice.__pyx_base.assign_item_from_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *, PyObject *))__pyx_memoryviewslice_assign_item_from_object; - __pyx_vtable__memoryviewslice.__pyx_base._get_base = (PyObject *(*)(struct __pyx_memoryview_obj *))__pyx_memoryviewslice__get_base; - #if CYTHON_USE_TYPE_SPECS - __pyx_t_1 = PyTuple_Pack(1, (PyObject *)__pyx_memoryview_type); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 952, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_memoryviewslice_type = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type___pyx_memoryviewslice_spec, __pyx_t_1); - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - if (unlikely(!__pyx_memoryviewslice_type)) __PYX_ERR(1, 952, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_memoryviewslice_spec, __pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 952, __pyx_L1_error) - #else - __pyx_memoryviewslice_type = &__pyx_type___pyx_memoryviewslice; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - __pyx_memoryviewslice_type->tp_base = __pyx_memoryview_type; - #endif - #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 952, __pyx_L1_error) - #endif - #if PY_MAJOR_VERSION < 3 - __pyx_memoryviewslice_type->tp_print = 0; - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_memoryviewslice_type->tp_dictoffset && __pyx_memoryviewslice_type->tp_getattro == PyObject_GenericGetAttr)) { - __pyx_memoryviewslice_type->tp_getattro = __Pyx_PyObject_GenericGetAttr; - } - #endif - if (__Pyx_SetVtable(__pyx_memoryviewslice_type, __pyx_vtabptr__memoryviewslice) < 0) __PYX_ERR(1, 952, __pyx_L1_error) - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_MergeVtables(__pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 952, __pyx_L1_error) - #endif - #if !CYTHON_COMPILING_IN_LIMITED_API - if (__Pyx_setup_reduce((PyObject *) __pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 952, __pyx_L1_error) - #endif - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_RefNannyFinishContext(); - return -1; -} - -static int __Pyx_modinit_type_import_code(void) { - __Pyx_RefNannyDeclarations - PyObject *__pyx_t_1 = NULL; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0); - /*--- Type import code ---*/ - __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 9, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_1); - __pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_0_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", - #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_2(PyTypeObject), - #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_2(PyTypeObject), - #else - sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_2(PyHeapTypeObject), - #endif - __Pyx_ImportType_CheckSize_Warn_3_0_2); if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(2, 9, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_RefNannyFinishContext(); - return 0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_1); - __Pyx_RefNannyFinishContext(); - return -1; -} - -static int __Pyx_modinit_variable_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0); - /*--- Variable import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - -static int __Pyx_modinit_function_import_code(void) { - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0); - /*--- Function import code ---*/ - __Pyx_RefNannyFinishContext(); - return 0; -} - - -#if PY_MAJOR_VERSION >= 3 -#if CYTHON_PEP489_MULTI_PHASE_INIT -static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/ -static int __pyx_pymod_exec_csimdjson(PyObject* module); /*proto*/ -static PyModuleDef_Slot __pyx_moduledef_slots[] = { - {Py_mod_create, (void*)__pyx_pymod_create}, - {Py_mod_exec, (void*)__pyx_pymod_exec_csimdjson}, - {0, NULL} -}; -#endif - -#ifdef __cplusplus -namespace { - struct PyModuleDef __pyx_moduledef = - #else - static struct PyModuleDef __pyx_moduledef = - #endif - { - PyModuleDef_HEAD_INIT, - "csimdjson", - 0, /* m_doc */ - #if CYTHON_PEP489_MULTI_PHASE_INIT - 0, /* m_size */ - #elif CYTHON_USE_MODULE_STATE - sizeof(__pyx_mstate), /* m_size */ - #else - -1, /* m_size */ - #endif - __pyx_methods /* m_methods */, - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_moduledef_slots, /* m_slots */ - #else - NULL, /* m_reload */ - #endif - #if CYTHON_USE_MODULE_STATE - __pyx_m_traverse, /* m_traverse */ - __pyx_m_clear, /* m_clear */ - NULL /* m_free */ - #else - NULL, /* m_traverse */ - NULL, /* m_clear */ - NULL /* m_free */ - #endif - }; - #ifdef __cplusplus -} /* anonymous namespace */ -#endif -#endif - -#ifndef CYTHON_NO_PYINIT_EXPORT -#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC -#elif PY_MAJOR_VERSION < 3 -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" void -#else -#define __Pyx_PyMODINIT_FUNC void -#endif -#else -#ifdef __cplusplus -#define __Pyx_PyMODINIT_FUNC extern "C" PyObject * -#else -#define __Pyx_PyMODINIT_FUNC PyObject * -#endif -#endif - - -#if PY_MAJOR_VERSION < 3 -__Pyx_PyMODINIT_FUNC initcsimdjson(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC initcsimdjson(void) -#else -__Pyx_PyMODINIT_FUNC PyInit_csimdjson(void) CYTHON_SMALL_CODE; /*proto*/ -__Pyx_PyMODINIT_FUNC PyInit_csimdjson(void) -#if CYTHON_PEP489_MULTI_PHASE_INIT -{ - return PyModuleDef_Init(&__pyx_moduledef); -} -static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) { - #if PY_VERSION_HEX >= 0x030700A1 - static PY_INT64_T main_interpreter_id = -1; - PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp); - if (main_interpreter_id == -1) { - main_interpreter_id = current_id; - return (unlikely(current_id == -1)) ? -1 : 0; - } else if (unlikely(main_interpreter_id != current_id)) - #else - static PyInterpreterState *main_interpreter = NULL; - PyInterpreterState *current_interpreter = PyThreadState_Get()->interp; - if (!main_interpreter) { - main_interpreter = current_interpreter; - } else if (unlikely(main_interpreter != current_interpreter)) - #endif - { - PyErr_SetString( - PyExc_ImportError, - "Interpreter change detected - this module can only be loaded into one interpreter per process."); - return -1; - } - return 0; -} -#if CYTHON_COMPILING_IN_LIMITED_API -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none) -#else -static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) -#endif -{ - PyObject *value = PyObject_GetAttrString(spec, from_name); - int result = 0; - if (likely(value)) { - if (allow_none || value != Py_None) { -#if CYTHON_COMPILING_IN_LIMITED_API - result = PyModule_AddObject(module, to_name, value); -#else - result = PyDict_SetItemString(moddict, to_name, value); -#endif - } - Py_DECREF(value); - } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_Clear(); - } else { - result = -1; - } - return result; -} -static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) { - PyObject *module = NULL, *moddict, *modname; - CYTHON_UNUSED_VAR(def); - if (__Pyx_check_single_interpreter()) - return NULL; - if (__pyx_m) - return __Pyx_NewRef(__pyx_m); - modname = PyObject_GetAttrString(spec, "name"); - if (unlikely(!modname)) goto bad; - module = PyModule_NewObject(modname); - Py_DECREF(modname); - if (unlikely(!module)) goto bad; -#if CYTHON_COMPILING_IN_LIMITED_API - moddict = module; -#else - moddict = PyModule_GetDict(module); - if (unlikely(!moddict)) goto bad; -#endif - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad; - if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad; - return module; -bad: - Py_XDECREF(module); - return NULL; -} - - -static CYTHON_SMALL_CODE int __pyx_pymod_exec_csimdjson(PyObject *__pyx_pyinit_module) -#endif -#endif -{ - int stringtab_initialized = 0; - #if CYTHON_USE_MODULE_STATE - int pystate_addmodule_run = 0; - #endif - PyObject *__pyx_t_1 = NULL; - PyObject *__pyx_t_2 = NULL; - PyObject *__pyx_t_3 = NULL; - PyObject *__pyx_t_4 = NULL; - PyObject *__pyx_t_5 = NULL; - int __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - static PyThread_type_lock __pyx_t_8[8]; - Py_ssize_t __pyx_t_9; - Py_UCS4 __pyx_t_10; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - __Pyx_RefNannyDeclarations - #if CYTHON_PEP489_MULTI_PHASE_INIT - if (__pyx_m) { - if (__pyx_m == __pyx_pyinit_module) return 0; - PyErr_SetString(PyExc_RuntimeError, "Module 'csimdjson' has already been imported. Re-initialisation is not supported."); - return -1; - } - #elif PY_MAJOR_VERSION >= 3 - if (__pyx_m) return __Pyx_NewRef(__pyx_m); - #endif - /*--- Module creation code ---*/ - #if CYTHON_PEP489_MULTI_PHASE_INIT - __pyx_m = __pyx_pyinit_module; - Py_INCREF(__pyx_m); - #else - #if PY_MAJOR_VERSION < 3 - __pyx_m = Py_InitModule4("csimdjson", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #elif CYTHON_USE_MODULE_STATE - __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error) - { - int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef); - __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to csimdjson pseudovariable */ - if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - pystate_addmodule_run = 1; - } - #else - __pyx_m = PyModule_Create(&__pyx_moduledef); - if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #endif - CYTHON_UNUSED_VAR(__pyx_t_1); - __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error) - Py_INCREF(__pyx_d); - __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error) - Py_INCREF(__pyx_b); - __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error) - Py_INCREF(__pyx_cython_runtime); - if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if CYTHON_REFNANNY -__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); -if (!__Pyx_RefNanny) { - PyErr_Clear(); - __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); - if (!__Pyx_RefNanny) - Py_FatalError("failed to import 'refnanny' module"); -} -#endif - __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_csimdjson(void)", 0); - if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pxy_PyFrame_Initialize_Offsets - __Pxy_PyFrame_Initialize_Offsets(); - #endif - __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) - __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) - #ifdef __Pyx_CyFunction_USED - if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_FusedFunction_USED - if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Coroutine_USED - if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_Generator_USED - if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - #ifdef __Pyx_StopAsyncIteration_USED - if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - /*--- Library function declarations ---*/ - /*--- Threads initialization code ---*/ - #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS - PyEval_InitThreads(); - #endif - /*--- Initialize various global constants etc. ---*/ - if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - stringtab_initialized = 1; - if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) - if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - if (__pyx_module_is_main_csimdjson) { - if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name_2, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - } - #if PY_MAJOR_VERSION >= 3 - { - PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) - if (!PyDict_GetItemString(modules, "csimdjson")) { - if (unlikely((PyDict_SetItemString(modules, "csimdjson", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - } - } - #endif - /*--- Builtin init code ---*/ - if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Constants init code ---*/ - if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - /*--- Global type/function init code ---*/ - (void)__Pyx_modinit_global_init_code(); - (void)__Pyx_modinit_variable_export_code(); - (void)__Pyx_modinit_function_export_code(); - if (unlikely((__Pyx_modinit_type_init_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - if (unlikely((__Pyx_modinit_type_import_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error) - (void)__Pyx_modinit_variable_import_code(); - (void)__Pyx_modinit_function_import_code(); - /*--- Execution code ---*/ - #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error) - #endif - - /* "View.MemoryView":99 - * - * cdef object __pyx_collections_abc_Sequence "__pyx_collections_abc_Sequence" - * try: # <<<<<<<<<<<<<< - * if __import__("sys").version_info >= (3, 3): - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "View.MemoryView":100 - * cdef object __pyx_collections_abc_Sequence "__pyx_collections_abc_Sequence" - * try: - * if __import__("sys").version_info >= (3, 3): # <<<<<<<<<<<<<< - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - * else: - */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin___import__, __pyx_tuple__19, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 100, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_version_info); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 100, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyObject_RichCompare(__pyx_t_5, __pyx_tuple__20, Py_GE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 100, __pyx_L2_error) - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely((__pyx_t_6 < 0))) __PYX_ERR(1, 100, __pyx_L2_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (__pyx_t_6) { - - /* "View.MemoryView":101 - * try: - * if __import__("sys").version_info >= (3, 3): - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence # <<<<<<<<<<<<<< - * else: - * __pyx_collections_abc_Sequence = __import__("collections").Sequence - */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin___import__, __pyx_tuple__21, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 101, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_abc); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 101, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_Sequence); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 101, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_XGOTREF(__pyx_collections_abc_Sequence); - __Pyx_DECREF_SET(__pyx_collections_abc_Sequence, __pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - __pyx_t_4 = 0; - - /* "View.MemoryView":100 - * cdef object __pyx_collections_abc_Sequence "__pyx_collections_abc_Sequence" - * try: - * if __import__("sys").version_info >= (3, 3): # <<<<<<<<<<<<<< - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - * else: - */ - goto __pyx_L8; - } - - /* "View.MemoryView":103 - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - * else: - * __pyx_collections_abc_Sequence = __import__("collections").Sequence # <<<<<<<<<<<<<< - * except: - * - */ - /*else*/ { - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin___import__, __pyx_tuple__22, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 103, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_Sequence); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 103, __pyx_L2_error) - __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XGOTREF(__pyx_collections_abc_Sequence); - __Pyx_DECREF_SET(__pyx_collections_abc_Sequence, __pyx_t_5); - __Pyx_GIVEREF(__pyx_t_5); - __pyx_t_5 = 0; - } - __pyx_L8:; - - /* "View.MemoryView":99 - * - * cdef object __pyx_collections_abc_Sequence "__pyx_collections_abc_Sequence" - * try: # <<<<<<<<<<<<<< - * if __import__("sys").version_info >= (3, 3): - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L7_try_end; - __pyx_L2_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - - /* "View.MemoryView":104 - * else: - * __pyx_collections_abc_Sequence = __import__("collections").Sequence - * except: # <<<<<<<<<<<<<< - * - * __pyx_collections_abc_Sequence = None - */ - /*except:*/ { - __Pyx_AddTraceback("View.MemoryView", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_4, &__pyx_t_7) < 0) __PYX_ERR(1, 104, __pyx_L4_except_error) - __Pyx_XGOTREF(__pyx_t_5); - __Pyx_XGOTREF(__pyx_t_4); - __Pyx_XGOTREF(__pyx_t_7); - - /* "View.MemoryView":106 - * except: - * - * __pyx_collections_abc_Sequence = None # <<<<<<<<<<<<<< - * - * - */ - __Pyx_INCREF(Py_None); - __Pyx_XGOTREF(__pyx_collections_abc_Sequence); - __Pyx_DECREF_SET(__pyx_collections_abc_Sequence, Py_None); - __Pyx_GIVEREF(Py_None); - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L3_exception_handled; - } - - /* "View.MemoryView":99 - * - * cdef object __pyx_collections_abc_Sequence "__pyx_collections_abc_Sequence" - * try: # <<<<<<<<<<<<<< - * if __import__("sys").version_info >= (3, 3): - * __pyx_collections_abc_Sequence = __import__("collections.abc").abc.Sequence - */ - __pyx_L4_except_error:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - goto __pyx_L1_error; - __pyx_L3_exception_handled:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - __pyx_L7_try_end:; - } - - /* "View.MemoryView":241 - * - * - * try: # <<<<<<<<<<<<<< - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_3, &__pyx_t_2, &__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_1); - /*try:*/ { - - /* "View.MemoryView":242 - * - * try: - * count = __pyx_collections_abc_Sequence.count # <<<<<<<<<<<<<< - * index = __pyx_collections_abc_Sequence.index - * except: - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_n_s_count); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 242, __pyx_L11_error) - __Pyx_GOTREF(__pyx_t_7); - if (__Pyx_SetItemOnTypeDict(__pyx_array_type, __pyx_n_s_count, __pyx_t_7) < 0) __PYX_ERR(1, 242, __pyx_L11_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - PyType_Modified(__pyx_array_type); - - /* "View.MemoryView":243 - * try: - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index # <<<<<<<<<<<<<< - * except: - * pass - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 243, __pyx_L11_error) - __Pyx_GOTREF(__pyx_t_7); - if (__Pyx_SetItemOnTypeDict(__pyx_array_type, __pyx_n_s_index, __pyx_t_7) < 0) __PYX_ERR(1, 243, __pyx_L11_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - PyType_Modified(__pyx_array_type); - - /* "View.MemoryView":241 - * - * - * try: # <<<<<<<<<<<<<< - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index - */ - } - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L16_try_end; - __pyx_L11_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "View.MemoryView":244 - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index - * except: # <<<<<<<<<<<<<< - * pass - * - */ - /*except:*/ { - __Pyx_ErrRestore(0,0,0); - goto __pyx_L12_exception_handled; - } - __pyx_L12_exception_handled:; - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_ExceptionReset(__pyx_t_3, __pyx_t_2, __pyx_t_1); - __pyx_L16_try_end:; - } - - /* "View.MemoryView":309 - * return self.name - * - * cdef generic = Enum("") # <<<<<<<<<<<<<< - * cdef strided = Enum("") # default - * cdef indirect = Enum("") - */ - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__23, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 309, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XGOTREF(generic); - __Pyx_DECREF_SET(generic, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - __pyx_t_7 = 0; - - /* "View.MemoryView":310 - * - * cdef generic = Enum("") - * cdef strided = Enum("") # default # <<<<<<<<<<<<<< - * cdef indirect = Enum("") - * - */ - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__24, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 310, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XGOTREF(strided); - __Pyx_DECREF_SET(strided, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - __pyx_t_7 = 0; - - /* "View.MemoryView":311 - * cdef generic = Enum("") - * cdef strided = Enum("") # default - * cdef indirect = Enum("") # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__25, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 311, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XGOTREF(indirect); - __Pyx_DECREF_SET(indirect, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - __pyx_t_7 = 0; - - /* "View.MemoryView":314 - * - * - * cdef contiguous = Enum("") # <<<<<<<<<<<<<< - * cdef indirect_contiguous = Enum("") - * - */ - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__26, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 314, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XGOTREF(contiguous); - __Pyx_DECREF_SET(contiguous, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - __pyx_t_7 = 0; - - /* "View.MemoryView":315 - * - * cdef contiguous = Enum("") - * cdef indirect_contiguous = Enum("") # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__27, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 315, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XGOTREF(indirect_contiguous); - __Pyx_DECREF_SET(indirect_contiguous, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - __pyx_t_7 = 0; - - /* "View.MemoryView":323 - * - * - * cdef int __pyx_memoryview_thread_locks_used = 0 # <<<<<<<<<<<<<< - * cdef PyThread_type_lock[8] __pyx_memoryview_thread_locks = [ - * PyThread_allocate_lock(), - */ - __pyx_memoryview_thread_locks_used = 0; - - /* "View.MemoryView":324 - * - * cdef int __pyx_memoryview_thread_locks_used = 0 - * cdef PyThread_type_lock[8] __pyx_memoryview_thread_locks = [ # <<<<<<<<<<<<<< - * PyThread_allocate_lock(), - * PyThread_allocate_lock(), - */ - __pyx_t_8[0] = PyThread_allocate_lock(); - __pyx_t_8[1] = PyThread_allocate_lock(); - __pyx_t_8[2] = PyThread_allocate_lock(); - __pyx_t_8[3] = PyThread_allocate_lock(); - __pyx_t_8[4] = PyThread_allocate_lock(); - __pyx_t_8[5] = PyThread_allocate_lock(); - __pyx_t_8[6] = PyThread_allocate_lock(); - __pyx_t_8[7] = PyThread_allocate_lock(); - memcpy(&(__pyx_memoryview_thread_locks[0]), __pyx_t_8, sizeof(__pyx_memoryview_thread_locks[0]) * (8)); - - /* "View.MemoryView":982 - * - * - * try: # <<<<<<<<<<<<<< - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_3); - /*try:*/ { - - /* "View.MemoryView":983 - * - * try: - * count = __pyx_collections_abc_Sequence.count # <<<<<<<<<<<<<< - * index = __pyx_collections_abc_Sequence.index - * except: - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_n_s_count); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 983, __pyx_L17_error) - __Pyx_GOTREF(__pyx_t_7); - if (__Pyx_SetItemOnTypeDict(__pyx_memoryviewslice_type, __pyx_n_s_count, __pyx_t_7) < 0) __PYX_ERR(1, 983, __pyx_L17_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - PyType_Modified(__pyx_memoryviewslice_type); - - /* "View.MemoryView":984 - * try: - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index # <<<<<<<<<<<<<< - * except: - * pass - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 984, __pyx_L17_error) - __Pyx_GOTREF(__pyx_t_7); - if (__Pyx_SetItemOnTypeDict(__pyx_memoryviewslice_type, __pyx_n_s_index, __pyx_t_7) < 0) __PYX_ERR(1, 984, __pyx_L17_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - PyType_Modified(__pyx_memoryviewslice_type); - - /* "View.MemoryView":982 - * - * - * try: # <<<<<<<<<<<<<< - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index - */ - } - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - goto __pyx_L22_try_end; - __pyx_L17_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "View.MemoryView":985 - * count = __pyx_collections_abc_Sequence.count - * index = __pyx_collections_abc_Sequence.index - * except: # <<<<<<<<<<<<<< - * pass - * - */ - /*except:*/ { - __Pyx_ErrRestore(0,0,0); - goto __pyx_L18_exception_handled; - } - __pyx_L18_exception_handled:; - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3); - __pyx_L22_try_end:; - } - - /* "View.MemoryView":988 - * pass - * - * try: # <<<<<<<<<<<<<< - * if __pyx_collections_abc_Sequence: - * - */ - { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ExceptionSave(&__pyx_t_3, &__pyx_t_2, &__pyx_t_1); - __Pyx_XGOTREF(__pyx_t_3); - __Pyx_XGOTREF(__pyx_t_2); - __Pyx_XGOTREF(__pyx_t_1); - /*try:*/ { - - /* "View.MemoryView":989 - * - * try: - * if __pyx_collections_abc_Sequence: # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_collections_abc_Sequence); if (unlikely((__pyx_t_6 < 0))) __PYX_ERR(1, 989, __pyx_L23_error) - if (__pyx_t_6) { - - /* "View.MemoryView":993 - * - * - * __pyx_collections_abc_Sequence.register(_memoryviewslice) # <<<<<<<<<<<<<< - * __pyx_collections_abc_Sequence.register(array) - * except: - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_n_s_register); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 993, __pyx_L23_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_7, ((PyObject *)__pyx_memoryviewslice_type)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 993, __pyx_L23_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "View.MemoryView":994 - * - * __pyx_collections_abc_Sequence.register(_memoryviewslice) - * __pyx_collections_abc_Sequence.register(array) # <<<<<<<<<<<<<< - * except: - * pass # ignore failure, it's a minor issue - */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_n_s_register); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 994, __pyx_L23_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_7 = __Pyx_PyObject_CallOneArg(__pyx_t_4, ((PyObject *)__pyx_array_type)); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 994, __pyx_L23_error) - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "View.MemoryView":989 - * - * try: - * if __pyx_collections_abc_Sequence: # <<<<<<<<<<<<<< - * - * - */ - } - - /* "View.MemoryView":988 - * pass - * - * try: # <<<<<<<<<<<<<< - * if __pyx_collections_abc_Sequence: - * - */ - } - __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L28_try_end; - __pyx_L23_error:; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "View.MemoryView":995 - * __pyx_collections_abc_Sequence.register(_memoryviewslice) - * __pyx_collections_abc_Sequence.register(array) - * except: # <<<<<<<<<<<<<< - * pass # ignore failure, it's a minor issue - * - */ - /*except:*/ { - __Pyx_ErrRestore(0,0,0); - goto __pyx_L24_exception_handled; - } - __pyx_L24_exception_handled:; - __Pyx_XGIVEREF(__pyx_t_3); - __Pyx_XGIVEREF(__pyx_t_2); - __Pyx_XGIVEREF(__pyx_t_1); - __Pyx_ExceptionReset(__pyx_t_3, __pyx_t_2, __pyx_t_1); - __pyx_L28_try_end:; - } - - /* "(tree fragment)":1 - * def __pyx_unpickle_Enum(__pyx_type, long __pyx_checksum, __pyx_state): # <<<<<<<<<<<<<< - * cdef object __pyx_PickleError - * cdef object __pyx_result - */ - __pyx_t_7 = PyCFunction_NewEx(&__pyx_mdef_15View_dot_MemoryView_1__pyx_unpickle_Enum, NULL, __pyx_n_s_View_MemoryView); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_pyx_unpickle_Enum, __pyx_t_7) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "csimdjson.pyx":3 - * # cython: language_level=3, c_string_type=unicode, c_string_encoding=utf8 - * # distutils: language=c++ - * import pathlib # <<<<<<<<<<<<<< - * - * from cython.operator cimport preincrement, dereference # noqa - */ - __pyx_t_7 = __Pyx_ImportDottedModule(__pyx_n_s_pathlib, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_pathlib, __pyx_t_7) < 0) __PYX_ERR(0, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "csimdjson.pyx":16 - * from simdjson.csimdjson cimport * # noqa - * - * MAXSIZE_BYTES = SIMDJSON_MAXSIZE_BYTES # <<<<<<<<<<<<<< - * PADDING = SIMDJSON_PADDING - * VERSION = ( - */ - __pyx_t_7 = __Pyx_PyInt_FromSize_t(simdjson::SIMDJSON_MAXSIZE_BYTES); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 16, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_MAXSIZE_BYTES, __pyx_t_7) < 0) __PYX_ERR(0, 16, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "csimdjson.pyx":17 - * - * MAXSIZE_BYTES = SIMDJSON_MAXSIZE_BYTES - * PADDING = SIMDJSON_PADDING # <<<<<<<<<<<<<< - * VERSION = ( - * f'{SIMDJSON_VERSION_MAJOR}.' - */ - __pyx_t_7 = __Pyx_PyInt_FromSize_t(simdjson::SIMDJSON_PADDING); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 17, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_PADDING, __pyx_t_7) < 0) __PYX_ERR(0, 17, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - - /* "csimdjson.pyx":19 - * PADDING = SIMDJSON_PADDING - * VERSION = ( - * f'{SIMDJSON_VERSION_MAJOR}.' # <<<<<<<<<<<<<< - * f'{SIMDJSON_VERSION_MINOR}.' - * f'{SIMDJSON_VERSION_REVISION}' - */ - __pyx_t_7 = PyTuple_New(5); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 19, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_9 = 0; - __pyx_t_10 = 127; - __pyx_t_4 = __Pyx_PyUnicode_From___pyx_anon_enum(simdjson::SIMDJSON_VERSION_MAJOR, 0, ' ', 'd'); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_4); - __pyx_t_4 = 0; - __Pyx_INCREF(__pyx_kp_u__2); - __pyx_t_9 += 1; - __Pyx_GIVEREF(__pyx_kp_u__2); - PyTuple_SET_ITEM(__pyx_t_7, 1, __pyx_kp_u__2); - - /* "csimdjson.pyx":20 - * VERSION = ( - * f'{SIMDJSON_VERSION_MAJOR}.' - * f'{SIMDJSON_VERSION_MINOR}.' # <<<<<<<<<<<<<< - * f'{SIMDJSON_VERSION_REVISION}' - * ) - */ - __pyx_t_4 = __Pyx_PyUnicode_From___pyx_anon_enum(simdjson::SIMDJSON_VERSION_MINOR, 0, ' ', 'd'); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 20, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_7, 2, __pyx_t_4); - __pyx_t_4 = 0; - __Pyx_INCREF(__pyx_kp_u__2); - __pyx_t_9 += 1; - __Pyx_GIVEREF(__pyx_kp_u__2); - PyTuple_SET_ITEM(__pyx_t_7, 3, __pyx_kp_u__2); - - /* "csimdjson.pyx":21 - * f'{SIMDJSON_VERSION_MAJOR}.' - * f'{SIMDJSON_VERSION_MINOR}.' - * f'{SIMDJSON_VERSION_REVISION}' # <<<<<<<<<<<<<< - * ) - * - */ - __pyx_t_4 = __Pyx_PyUnicode_From___pyx_anon_enum(simdjson::SIMDJSON_VERSION_REVISION, 0, ' ', 'd'); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 21, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 += __Pyx_PyUnicode_GET_LENGTH(__pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_7, 4, __pyx_t_4); - __pyx_t_4 = 0; - - /* "csimdjson.pyx":19 - * PADDING = SIMDJSON_PADDING - * VERSION = ( - * f'{SIMDJSON_VERSION_MAJOR}.' # <<<<<<<<<<<<<< - * f'{SIMDJSON_VERSION_MINOR}.' - * f'{SIMDJSON_VERSION_REVISION}' - */ - __pyx_t_4 = __Pyx_PyUnicode_Join(__pyx_t_7, 5, __pyx_t_9, __pyx_t_10); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 19, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_VERSION, __pyx_t_4) < 0) __PYX_ERR(0, 18, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_11ArrayBuffer_9__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_ArrayBuffer___reduce_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__31)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_4) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_11ArrayBuffer_11__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_ArrayBuffer___setstate_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__33)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_4) < 0) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":228 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_5Array_8at_pointer, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Array_at_pointer, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__35)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 228, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Array, __pyx_n_s_at_pointer, __pyx_t_4) < 0) __PYX_ERR(0, 228, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Array); - - /* "csimdjson.pyx":237 - * ) - * - * def as_list(self): # <<<<<<<<<<<<<< - * """ - * Convert this Array to a regular python list, recursively - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_5Array_10as_list, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Array_as_list, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__36)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 237, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Array, __pyx_n_s_as_list, __pyx_t_4) < 0) __PYX_ERR(0, 237, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Array); - - /* "csimdjson.pyx":244 - * return array_to_list(self.parser, self.c_element, True) - * - * def as_buffer(self, *, of_type): # <<<<<<<<<<<<<< - * """ - * **Copies** the contents of a **homogeneous** array to an - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_5Array_12as_buffer, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Array_as_buffer, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__38)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 244, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Array, __pyx_n_s_as_buffer, __pyx_t_4) < 0) __PYX_ERR(0, 244, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Array); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_5Array_14__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Array___reduce_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__39)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_4) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_5Array_16__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Array___setstate_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__40)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_4) < 0) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":301 - * ) - * - * def get(self, key, default=None): # <<<<<<<<<<<<<< - * """ - * Return the value of `key`, or `default` if the key does - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_3get, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object_get, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__42)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 301, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_4, __pyx_tuple__43); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_get, __pyx_t_4) < 0) __PYX_ERR(0, 301, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Object); - - /* "csimdjson.pyx":336 - * preincrement(it) - * - * keys = __iter__ # <<<<<<<<<<<<<< - * - * def values(self): - */ - __Pyx_GetNameInClass(__pyx_t_4, (PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_iter); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 336, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_keys, __pyx_t_4) < 0) __PYX_ERR(0, 336, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Object); - - /* "csimdjson.pyx":338 - * keys = __iter__ - * - * def values(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over of all values in this `Object`. - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_12values, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object_values, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__11)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 338, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_values, __pyx_t_4) < 0) __PYX_ERR(0, 338, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Object); - - /* "csimdjson.pyx":347 - * preincrement(it) - * - * def items(self): # <<<<<<<<<<<<<< - * """ - * Returns an iterator over all the (key, value) pairs in this - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_15items, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object_items, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 347, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_items, __pyx_t_4) < 0) __PYX_ERR(0, 347, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Object); - - /* "csimdjson.pyx":366 - * preincrement(it) - * - * def at_pointer(self, json_pointer): # <<<<<<<<<<<<<< - * """Get the value at the given JSON pointer.""" - * return element_to_primitive( - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_18at_pointer, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object_at_pointer, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__46)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 366, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_at_pointer, __pyx_t_4) < 0) __PYX_ERR(0, 366, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Object); - - /* "csimdjson.pyx":375 - * ) - * - * def as_dict(self): # <<<<<<<<<<<<<< - * """ - * Convert this `Object` to a regular python dictionary, - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_20as_dict, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object_as_dict, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__47)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 375, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Object, __pyx_n_s_as_dict, __pyx_t_4) < 0) __PYX_ERR(0, 375, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Object); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_22__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object___reduce_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__48)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_4) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "self.c_element,self.c_parser cannot be converted to a Python object for pickling" - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Object_24__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Object___setstate_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__49)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_4) < 0) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":404 - * cdef shared_ptr[simd_parser] c_parser - * - * def __cinit__(self, size_t max_capacity=SIMDJSON_MAXSIZE_BYTES): # <<<<<<<<<<<<<< - * self.c_parser = make_shared[simd_parser](max_capacity) - * - */ - __pyx_k__13 = simdjson::SIMDJSON_MAXSIZE_BYTES; - - /* "csimdjson.pyx":410 - * self.c_parser.reset() - * - * def parse(self, src not None, bint recursive=False): # <<<<<<<<<<<<<< - * """Parse the given JSON document. - * - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Parser_5parse, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Parser_parse, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__51)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 410, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_4, __pyx_tuple__52); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Parser, __pyx_n_s_parse, __pyx_t_4) < 0) __PYX_ERR(0, 410, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Parser); - - /* "csimdjson.pyx":495 - * ) - * - * def load(self, path, bint recursive=False): # <<<<<<<<<<<<<< - * """Load a JSON document from the file system path `path`. - * - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Parser_7load, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Parser_load, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__54)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 495, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_4, __pyx_tuple__52); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Parser, __pyx_n_s_load, __pyx_t_4) < 0) __PYX_ERR(0, 495, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Parser); - - /* "csimdjson.pyx":521 - * return element_to_primitive(self, document, recursive) - * - * def get_implementations(self, supported_by_runtime=True): # <<<<<<<<<<<<<< - * """ - * A list of available parser implementations in the form of [(name, - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Parser_9get_implementations, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Parser_get_implementations, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__16)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_4, __pyx_tuple__56); - if (__Pyx_SetItemOnTypeDict((PyObject *)__pyx_ptype_9csimdjson_Parser, __pyx_n_s_get_implementations, __pyx_t_4) < 0) __PYX_ERR(0, 521, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - PyType_Modified(__pyx_ptype_9csimdjson_Parser); - - /* "(tree fragment)":1 - * def __reduce_cython__(self): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Parser_12__reduce_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Parser___reduce_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__57)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_reduce_cython, __pyx_t_4) < 0) __PYX_ERR(1, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "(tree fragment)":3 - * def __reduce_cython__(self): - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - * def __setstate_cython__(self, __pyx_state): # <<<<<<<<<<<<<< - * raise TypeError, "no default __reduce__ due to non-trivial __cinit__" - */ - __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_9csimdjson_6Parser_14__setstate_cython__, __Pyx_CYFUNCTION_CCLASS, __pyx_n_s_Parser___setstate_cython, NULL, __pyx_n_s_csimdjson, __pyx_d, ((PyObject *)__pyx_codeobj__58)); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_setstate_cython, __pyx_t_4) < 0) __PYX_ERR(1, 3, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /* "csimdjson.pyx":1 - * # cython: language_level=3, c_string_type=unicode, c_string_encoding=utf8 # <<<<<<<<<<<<<< - * # distutils: language=c++ - * import pathlib - */ - __pyx_t_4 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_4) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - - /*--- Wrapped vars code ---*/ - - goto __pyx_L0; - __pyx_L1_error:; - __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_7); - if (__pyx_m) { - if (__pyx_d && stringtab_initialized) { - __Pyx_AddTraceback("init csimdjson", __pyx_clineno, __pyx_lineno, __pyx_filename); - } - #if !CYTHON_USE_MODULE_STATE - Py_CLEAR(__pyx_m); - #else - Py_DECREF(__pyx_m); - if (pystate_addmodule_run) { - PyObject *tp, *value, *tb; - PyErr_Fetch(&tp, &value, &tb); - PyState_RemoveModule(&__pyx_moduledef); - PyErr_Restore(tp, value, tb); - } - #endif - } else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ImportError, "init csimdjson"); - } - __pyx_L0:; - __Pyx_RefNannyFinishContext(); - #if CYTHON_PEP489_MULTI_PHASE_INIT - return (__pyx_m != NULL) ? 0 : -1; - #elif PY_MAJOR_VERSION >= 3 - return __pyx_m; - #else - return; - #endif -} -/* #### Code section: cleanup_globals ### */ -/* #### Code section: cleanup_module ### */ -/* #### Code section: main_method ### */ -/* #### Code section: utility_code_pragmas ### */ -#ifdef _MSC_VER -#pragma warning( push ) -/* Warning 4127: conditional expression is constant - * Cython uses constant conditional expressions to allow in inline functions to be optimized at - * compile-time, so this warning is not useful - */ -#pragma warning( disable : 4127 ) -#endif - - - -/* #### Code section: utility_code_def ### */ - -/* --- Runtime support code --- */ -/* Refnanny */ -#if CYTHON_REFNANNY -static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { - PyObject *m = NULL, *p = NULL; - void *r = NULL; - m = PyImport_ImportModule(modname); - if (!m) goto end; - p = PyObject_GetAttrString(m, "RefNannyAPI"); - if (!p) goto end; - r = PyLong_AsVoidPtr(p); -end: - Py_XDECREF(p); - Py_XDECREF(m); - return (__Pyx_RefNannyAPIStruct *)r; -} -#endif - -/* PyErrExceptionMatches */ -#if CYTHON_FAST_THREAD_STATE -static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; i= 0x030C00A6 - PyObject *current_exception = tstate->current_exception; - if (unlikely(!current_exception)) return 0; - exc_type = (PyObject*) Py_TYPE(current_exception); - if (exc_type == err) return 1; -#else - exc_type = tstate->curexc_type; - if (exc_type == err) return 1; - if (unlikely(!exc_type)) return 0; -#endif - #if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(exc_type); - #endif - if (unlikely(PyTuple_Check(err))) { - result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err); - } else { - result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err); - } - #if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(exc_type); - #endif - return result; -} -#endif - -/* PyErrFetchRestore */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject *tmp_value; - assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value))); - if (value) { - #if CYTHON_COMPILING_IN_CPYTHON - if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb)) - #endif - PyException_SetTraceback(value, tb); - } - tmp_value = tstate->current_exception; - tstate->current_exception = value; - Py_XDECREF(tmp_value); -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - tmp_type = tstate->curexc_type; - tmp_value = tstate->curexc_value; - tmp_tb = tstate->curexc_traceback; - tstate->curexc_type = type; - tstate->curexc_value = value; - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -#endif -} -static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { -#if PY_VERSION_HEX >= 0x030C00A6 - PyObject* exc_value; - exc_value = tstate->current_exception; - tstate->current_exception = 0; - *value = exc_value; - *type = NULL; - *tb = NULL; - if (exc_value) { - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - #if CYTHON_COMPILING_IN_CPYTHON - *tb = ((PyBaseExceptionObject*) exc_value)->traceback; - Py_XINCREF(*tb); - #else - *tb = PyException_GetTraceback(exc_value); - #endif - } -#else - *type = tstate->curexc_type; - *value = tstate->curexc_value; - *tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; -#endif -} -#endif - -/* PyObjectGetAttrStr */ -#if CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) { - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro)) - return tp->tp_getattro(obj, attr_name); -#if PY_MAJOR_VERSION < 3 - if (likely(tp->tp_getattr)) - return tp->tp_getattr(obj, PyString_AS_STRING(attr_name)); -#endif - return PyObject_GetAttr(obj, attr_name); -} -#endif - -/* PyObjectGetAttrStrNoError */ -static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) - __Pyx_PyErr_Clear(); -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) { - PyObject *result; -#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1 - PyTypeObject* tp = Py_TYPE(obj); - if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) { - return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1); - } -#endif - result = __Pyx_PyObject_GetAttrStr(obj, attr_name); - if (unlikely(!result)) { - __Pyx_PyObject_GetAttrStr_ClearAttributeError(); - } - return result; -} - -/* GetBuiltinName */ -static PyObject *__Pyx_GetBuiltinName(PyObject *name) { - PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name); - if (unlikely(!result) && !PyErr_Occurred()) { - PyErr_Format(PyExc_NameError, -#if PY_MAJOR_VERSION >= 3 - "name '%U' is not defined", name); -#else - "name '%.200s' is not defined", PyString_AS_STRING(name)); -#endif - } - return result; -} - -/* TupleAndListFromArray */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) { - PyObject *v; - Py_ssize_t i; - for (i = 0; i < length; i++) { - v = dest[i] = src[i]; - Py_INCREF(v); - } -} -static CYTHON_INLINE PyObject * -__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - Py_INCREF(__pyx_empty_tuple); - return __pyx_empty_tuple; - } - res = PyTuple_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n); - return res; -} -static CYTHON_INLINE PyObject * -__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n) -{ - PyObject *res; - if (n <= 0) { - return PyList_New(0); - } - res = PyList_New(n); - if (unlikely(res == NULL)) return NULL; - __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n); - return res; -} -#endif - -/* BytesEquals */ -static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else - if (s1 == s2) { - return (equals == Py_EQ); - } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) { - const char *ps1, *ps2; - Py_ssize_t length = PyBytes_GET_SIZE(s1); - if (length != PyBytes_GET_SIZE(s2)) - return (equals == Py_NE); - ps1 = PyBytes_AS_STRING(s1); - ps2 = PyBytes_AS_STRING(s2); - if (ps1[0] != ps2[0]) { - return (equals == Py_NE); - } else if (length == 1) { - return (equals == Py_EQ); - } else { - int result; -#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000) - Py_hash_t hash1, hash2; - hash1 = ((PyBytesObject*)s1)->ob_shash; - hash2 = ((PyBytesObject*)s2)->ob_shash; - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - return (equals == Py_NE); - } -#endif - result = memcmp(ps1, ps2, (size_t)length); - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) { - return (equals == Py_NE); - } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) { - return (equals == Py_NE); - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -#endif -} - -/* UnicodeEquals */ -static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) { -#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API - return PyObject_RichCompareBool(s1, s2, equals); -#else -#if PY_MAJOR_VERSION < 3 - PyObject* owned_ref = NULL; -#endif - int s1_is_unicode, s2_is_unicode; - if (s1 == s2) { - goto return_eq; - } - s1_is_unicode = PyUnicode_CheckExact(s1); - s2_is_unicode = PyUnicode_CheckExact(s2); -#if PY_MAJOR_VERSION < 3 - if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) { - owned_ref = PyUnicode_FromObject(s2); - if (unlikely(!owned_ref)) - return -1; - s2 = owned_ref; - s2_is_unicode = 1; - } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) { - owned_ref = PyUnicode_FromObject(s1); - if (unlikely(!owned_ref)) - return -1; - s1 = owned_ref; - s1_is_unicode = 1; - } else if (((!s2_is_unicode) & (!s1_is_unicode))) { - return __Pyx_PyBytes_Equals(s1, s2, equals); - } -#endif - if (s1_is_unicode & s2_is_unicode) { - Py_ssize_t length; - int kind; - void *data1, *data2; - if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0)) - return -1; - length = __Pyx_PyUnicode_GET_LENGTH(s1); - if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) { - goto return_ne; - } -#if CYTHON_USE_UNICODE_INTERNALS - { - Py_hash_t hash1, hash2; - #if CYTHON_PEP393_ENABLED - hash1 = ((PyASCIIObject*)s1)->hash; - hash2 = ((PyASCIIObject*)s2)->hash; - #else - hash1 = ((PyUnicodeObject*)s1)->hash; - hash2 = ((PyUnicodeObject*)s2)->hash; - #endif - if (hash1 != hash2 && hash1 != -1 && hash2 != -1) { - goto return_ne; - } - } -#endif - kind = __Pyx_PyUnicode_KIND(s1); - if (kind != __Pyx_PyUnicode_KIND(s2)) { - goto return_ne; - } - data1 = __Pyx_PyUnicode_DATA(s1); - data2 = __Pyx_PyUnicode_DATA(s2); - if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) { - goto return_ne; - } else if (length == 1) { - goto return_eq; - } else { - int result = memcmp(data1, data2, (size_t)(length * kind)); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ) ? (result == 0) : (result != 0); - } - } else if ((s1 == Py_None) & s2_is_unicode) { - goto return_ne; - } else if ((s2 == Py_None) & s1_is_unicode) { - goto return_ne; - } else { - int result; - PyObject* py_result = PyObject_RichCompare(s1, s2, equals); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - if (!py_result) - return -1; - result = __Pyx_PyObject_IsTrue(py_result); - Py_DECREF(py_result); - return result; - } -return_eq: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_EQ); -return_ne: - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(owned_ref); - #endif - return (equals == Py_NE); -#endif -} - -/* fastcall */ -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s) -{ - Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames); - for (i = 0; i < n; i++) - { - if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i]; - } - for (i = 0; i < n; i++) - { - int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ); - if (unlikely(eq != 0)) { - if (unlikely(eq < 0)) return NULL; // error - return kwvalues[i]; - } - } - return NULL; // not found (no exception set) -} -#endif - -/* RaiseArgTupleInvalid */ -static void __Pyx_RaiseArgtupleInvalid( - const char* func_name, - int exact, - Py_ssize_t num_min, - Py_ssize_t num_max, - Py_ssize_t num_found) -{ - Py_ssize_t num_expected; - const char *more_or_less; - if (num_found < num_min) { - num_expected = num_min; - more_or_less = "at least"; - } else { - num_expected = num_max; - more_or_less = "at most"; - } - if (exact) { - more_or_less = "exactly"; - } - PyErr_Format(PyExc_TypeError, - "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)", - func_name, more_or_less, num_expected, - (num_expected == 1) ? "" : "s", num_found); -} - -/* RaiseDoubleKeywords */ -static void __Pyx_RaiseDoubleKeywordsError( - const char* func_name, - PyObject* kw_name) -{ - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION >= 3 - "%s() got multiple values for keyword argument '%U'", func_name, kw_name); - #else - "%s() got multiple values for keyword argument '%s'", func_name, - PyString_AsString(kw_name)); - #endif -} - -/* ParseKeywords */ -static int __Pyx_ParseOptionalKeywords( - PyObject *kwds, - PyObject *const *kwvalues, - PyObject **argnames[], - PyObject *kwds2, - PyObject *values[], - Py_ssize_t num_pos_args, - const char* function_name) -{ - PyObject *key = 0, *value = 0; - Py_ssize_t pos = 0; - PyObject*** name; - PyObject*** first_kw_arg = argnames + num_pos_args; - int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds)); - while (1) { - Py_XDECREF(key); key = NULL; - Py_XDECREF(value); value = NULL; - if (kwds_is_tuple) { - Py_ssize_t size; -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(kwds); -#else - size = PyTuple_Size(kwds); - if (size < 0) goto bad; -#endif - if (pos >= size) break; -#if CYTHON_AVOID_BORROWED_REFS - key = __Pyx_PySequence_ITEM(kwds, pos); - if (!key) goto bad; -#elif CYTHON_ASSUME_SAFE_MACROS - key = PyTuple_GET_ITEM(kwds, pos); -#else - key = PyTuple_GetItem(kwds, pos); - if (!key) goto bad; -#endif - value = kwvalues[pos]; - pos++; - } - else - { - if (!PyDict_Next(kwds, &pos, &key, &value)) break; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - } - name = first_kw_arg; - while (*name && (**name != key)) name++; - if (*name) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - Py_INCREF(value); // transfer ownership of value to values - Py_DECREF(key); -#endif - key = NULL; - value = NULL; - continue; - } -#if !CYTHON_AVOID_BORROWED_REFS - Py_INCREF(key); -#endif - Py_INCREF(value); - name = first_kw_arg; - #if PY_MAJOR_VERSION < 3 - if (likely(PyString_Check(key))) { - while (*name) { - if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key)) - && _PyString_Eq(**name, key)) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; // ownership transferred to values -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - if ((**argname == key) || ( - (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key)) - && _PyString_Eq(**argname, key))) { - goto arg_passed_twice; - } - argname++; - } - } - } else - #endif - if (likely(PyUnicode_Check(key))) { - while (*name) { - int cmp = ( - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**name, key) - ); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) { - values[name-argnames] = value; -#if CYTHON_AVOID_BORROWED_REFS - value = NULL; // ownership transferred to values -#endif - break; - } - name++; - } - if (*name) continue; - else { - PyObject*** argname = argnames; - while (argname != first_kw_arg) { - int cmp = (**argname == key) ? 0 : - #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3 - (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 : - #endif - PyUnicode_Compare(**argname, key); - if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad; - if (cmp == 0) goto arg_passed_twice; - argname++; - } - } - } else - goto invalid_keyword_type; - if (kwds2) { - if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad; - } else { - goto invalid_keyword; - } - } - Py_XDECREF(key); - Py_XDECREF(value); - return 0; -arg_passed_twice: - __Pyx_RaiseDoubleKeywordsError(function_name, key); - goto bad; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%.200s() keywords must be strings", function_name); - goto bad; -invalid_keyword: - #if PY_MAJOR_VERSION < 3 - PyErr_Format(PyExc_TypeError, - "%.200s() got an unexpected keyword argument '%.200s'", - function_name, PyString_AsString(key)); - #else - PyErr_Format(PyExc_TypeError, - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif -bad: - Py_XDECREF(key); - Py_XDECREF(value); - return -1; -} - -/* ArgTypeTest */ -static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact) -{ - __Pyx_TypeName type_name; - __Pyx_TypeName obj_type_name; - if (unlikely(!type)) { - PyErr_SetString(PyExc_SystemError, "Missing type object"); - return 0; - } - else if (exact) { - #if PY_MAJOR_VERSION == 2 - if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1; - #endif - } - else { - if (likely(__Pyx_TypeCheck(obj, type))) return 1; - } - type_name = __Pyx_PyType_GetName(type); - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME - ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name); - __Pyx_DECREF_TypeName(type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* RaiseException */ -#if PY_MAJOR_VERSION < 3 -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { - __Pyx_PyThreadState_declare - CYTHON_UNUSED_VAR(cause); - Py_XINCREF(type); - if (!value || value == Py_None) - value = NULL; - else - Py_INCREF(value); - if (!tb || tb == Py_None) - tb = NULL; - else { - Py_INCREF(tb); - if (!PyTraceBack_Check(tb)) { - PyErr_SetString(PyExc_TypeError, - "raise: arg 3 must be a traceback or None"); - goto raise_error; - } - } - if (PyType_Check(type)) { -#if CYTHON_COMPILING_IN_PYPY - if (!value) { - Py_INCREF(Py_None); - value = Py_None; - } -#endif - PyErr_NormalizeException(&type, &value, &tb); - } else { - if (value) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto raise_error; - } - value = type; - type = (PyObject*) Py_TYPE(type); - Py_INCREF(type); - if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) { - PyErr_SetString(PyExc_TypeError, - "raise: exception class must be a subclass of BaseException"); - goto raise_error; - } - } - __Pyx_PyThreadState_assign - __Pyx_ErrRestore(type, value, tb); - return; -raise_error: - Py_XDECREF(value); - Py_XDECREF(type); - Py_XDECREF(tb); - return; -} -#else -static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) { - PyObject* owned_instance = NULL; - if (tb == Py_None) { - tb = 0; - } else if (tb && !PyTraceBack_Check(tb)) { - PyErr_SetString(PyExc_TypeError, - "raise: arg 3 must be a traceback or None"); - goto bad; - } - if (value == Py_None) - value = 0; - if (PyExceptionInstance_Check(type)) { - if (value) { - PyErr_SetString(PyExc_TypeError, - "instance exception may not have a separate value"); - goto bad; - } - value = type; - type = (PyObject*) Py_TYPE(value); - } else if (PyExceptionClass_Check(type)) { - PyObject *instance_class = NULL; - if (value && PyExceptionInstance_Check(value)) { - instance_class = (PyObject*) Py_TYPE(value); - if (instance_class != type) { - int is_subclass = PyObject_IsSubclass(instance_class, type); - if (!is_subclass) { - instance_class = NULL; - } else if (unlikely(is_subclass == -1)) { - goto bad; - } else { - type = instance_class; - } - } - } - if (!instance_class) { - PyObject *args; - if (!value) - args = PyTuple_New(0); - else if (PyTuple_Check(value)) { - Py_INCREF(value); - args = value; - } else - args = PyTuple_Pack(1, value); - if (!args) - goto bad; - owned_instance = PyObject_Call(type, args, NULL); - Py_DECREF(args); - if (!owned_instance) - goto bad; - value = owned_instance; - if (!PyExceptionInstance_Check(value)) { - PyErr_Format(PyExc_TypeError, - "calling %R should have returned an instance of " - "BaseException, not %R", - type, Py_TYPE(value)); - goto bad; - } - } - } else { - PyErr_SetString(PyExc_TypeError, - "raise: exception class must be a subclass of BaseException"); - goto bad; - } - if (cause) { - PyObject *fixed_cause; - if (cause == Py_None) { - fixed_cause = NULL; - } else if (PyExceptionClass_Check(cause)) { - fixed_cause = PyObject_CallObject(cause, NULL); - if (fixed_cause == NULL) - goto bad; - } else if (PyExceptionInstance_Check(cause)) { - fixed_cause = cause; - Py_INCREF(fixed_cause); - } else { - PyErr_SetString(PyExc_TypeError, - "exception causes must derive from " - "BaseException"); - goto bad; - } - PyException_SetCause(value, fixed_cause); - } - PyErr_SetObject(type, value); - if (tb) { - #if PY_VERSION_HEX >= 0x030C00A6 - PyException_SetTraceback(value, tb); - #elif CYTHON_FAST_THREAD_STATE - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject* tmp_tb = tstate->curexc_traceback; - if (tb != tmp_tb) { - Py_INCREF(tb); - tstate->curexc_traceback = tb; - Py_XDECREF(tmp_tb); - } -#else - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb); - Py_INCREF(tb); - PyErr_Restore(tmp_type, tmp_value, tb); - Py_XDECREF(tmp_tb); -#endif - } -bad: - Py_XDECREF(owned_instance); - return; -} -#endif - -/* PyFunctionFastCall */ -#if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL -static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na, - PyObject *globals) { - PyFrameObject *f; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject **fastlocals; - Py_ssize_t i; - PyObject *result; - assert(globals != NULL); - /* XXX Perhaps we should create a specialized - PyFrame_New() that doesn't take locals, but does - take builtins without sanity checking them. - */ - assert(tstate != NULL); - f = PyFrame_New(tstate, co, globals, NULL); - if (f == NULL) { - return NULL; - } - fastlocals = __Pyx_PyFrame_GetLocalsplus(f); - for (i = 0; i < na; i++) { - Py_INCREF(*args); - fastlocals[i] = *args++; - } - result = PyEval_EvalFrameEx(f,0); - ++tstate->recursion_depth; - Py_DECREF(f); - --tstate->recursion_depth; - return result; -} -static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) { - PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); - PyObject *globals = PyFunction_GET_GLOBALS(func); - PyObject *argdefs = PyFunction_GET_DEFAULTS(func); - PyObject *closure; -#if PY_MAJOR_VERSION >= 3 - PyObject *kwdefs; -#endif - PyObject *kwtuple, **k; - PyObject **d; - Py_ssize_t nd; - Py_ssize_t nk; - PyObject *result; - assert(kwargs == NULL || PyDict_Check(kwargs)); - nk = kwargs ? PyDict_Size(kwargs) : 0; - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) { - return NULL; - } - if ( -#if PY_MAJOR_VERSION >= 3 - co->co_kwonlyargcount == 0 && -#endif - likely(kwargs == NULL || nk == 0) && - co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) { - if (argdefs == NULL && co->co_argcount == nargs) { - result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals); - goto done; - } - else if (nargs == 0 && argdefs != NULL - && co->co_argcount == Py_SIZE(argdefs)) { - /* function called with no arguments, but all parameters have - a default value: use default values as arguments .*/ - args = &PyTuple_GET_ITEM(argdefs, 0); - result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals); - goto done; - } - } - if (kwargs != NULL) { - Py_ssize_t pos, i; - kwtuple = PyTuple_New(2 * nk); - if (kwtuple == NULL) { - result = NULL; - goto done; - } - k = &PyTuple_GET_ITEM(kwtuple, 0); - pos = i = 0; - while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) { - Py_INCREF(k[i]); - Py_INCREF(k[i+1]); - i += 2; - } - nk = i / 2; - } - else { - kwtuple = NULL; - k = NULL; - } - closure = PyFunction_GET_CLOSURE(func); -#if PY_MAJOR_VERSION >= 3 - kwdefs = PyFunction_GET_KW_DEFAULTS(func); -#endif - if (argdefs != NULL) { - d = &PyTuple_GET_ITEM(argdefs, 0); - nd = Py_SIZE(argdefs); - } - else { - d = NULL; - nd = 0; - } -#if PY_MAJOR_VERSION >= 3 - result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, kwdefs, closure); -#else - result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, - args, (int)nargs, - k, (int)nk, - d, (int)nd, closure); -#endif - Py_XDECREF(kwtuple); -done: - Py_LeaveRecursiveCall(); - return result; -} -#endif - -/* PyObjectCall */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *result; - ternaryfunc call = Py_TYPE(func)->tp_call; - if (unlikely(!call)) - return PyObject_Call(func, arg, kw); - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - result = (*call)(func, arg, kw); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectCallMethO */ -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = PyCFunction_GET_FUNCTION(func); - self = PyCFunction_GET_SELF(func); - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -/* PyObjectFastCall */ -static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) { - PyObject *argstuple; - PyObject *result = 0; - size_t i; - argstuple = PyTuple_New((Py_ssize_t)nargs); - if (unlikely(!argstuple)) return NULL; - for (i = 0; i < nargs; i++) { - Py_INCREF(args[i]); - if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad; - } - result = __Pyx_PyObject_Call(func, argstuple, kwargs); - bad: - Py_DECREF(argstuple); - return result; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) { - Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs); -#if CYTHON_COMPILING_IN_CPYTHON - if (nargs == 0 && kwargs == NULL) { -#if defined(__Pyx_CyFunction_USED) && defined(NDEBUG) - if (__Pyx_IsCyOrPyCFunction(func)) -#else - if (PyCFunction_Check(func)) -#endif - { - if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) { - return __Pyx_PyObject_CallMethO(func, NULL); - } - } - } - else if (nargs == 1 && kwargs == NULL) { - if (PyCFunction_Check(func)) - { - if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { - return __Pyx_PyObject_CallMethO(func, args[0]); - } - } - } -#endif - #if PY_VERSION_HEX < 0x030800B1 - #if CYTHON_FAST_PYCCALL - if (PyCFunction_Check(func)) { - if (kwargs) { - return _PyCFunction_FastCallDict(func, args, nargs, kwargs); - } else { - return _PyCFunction_FastCallKeywords(func, args, nargs, NULL); - } - } - #if PY_VERSION_HEX >= 0x030700A1 - if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) { - return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL); - } - #endif - #endif - #if CYTHON_FAST_PYCALL - if (PyFunction_Check(func)) { - return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs); - } - #endif - #endif - #if CYTHON_VECTORCALL - #if Py_VERSION_HEX < 0x03090000 - vectorcallfunc f = _PyVectorcall_Function(func); - #else - vectorcallfunc f = PyVectorcall_Function(func); - #endif - if (f) { - return f(func, args, (size_t)nargs, kwargs); - } - #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL - if (__Pyx_CyFunction_CheckExact(func)) { - __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func); - if (f) return f(func, args, (size_t)nargs, kwargs); - } - #endif - if (nargs == 0) { - return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs); - } - return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs); -} - -/* RaiseUnexpectedTypeError */ -static int -__Pyx_RaiseUnexpectedTypeError(const char *expected, PyObject *obj) -{ - __Pyx_TypeName obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, "Expected %s, got " __Pyx_FMT_TYPENAME, - expected, obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return 0; -} - -/* CIntToDigits */ -static const char DIGIT_PAIRS_10[2*10*10+1] = { - "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899" -}; -static const char DIGIT_PAIRS_8[2*8*8+1] = { - "0001020304050607" - "1011121314151617" - "2021222324252627" - "3031323334353637" - "4041424344454647" - "5051525354555657" - "6061626364656667" - "7071727374757677" -}; -static const char DIGITS_HEX[2*16+1] = { - "0123456789abcdef" - "0123456789ABCDEF" -}; - -/* BuildPyUnicode */ -static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, int clength, - int prepend_sign, char padding_char) { - PyObject *uval; - Py_ssize_t uoffset = ulength - clength; -#if CYTHON_USE_UNICODE_INTERNALS - Py_ssize_t i; -#if CYTHON_PEP393_ENABLED - void *udata; - uval = PyUnicode_New(ulength, 127); - if (unlikely(!uval)) return NULL; - udata = PyUnicode_DATA(uval); -#else - Py_UNICODE *udata; - uval = PyUnicode_FromUnicode(NULL, ulength); - if (unlikely(!uval)) return NULL; - udata = PyUnicode_AS_UNICODE(uval); -#endif - if (uoffset > 0) { - i = 0; - if (prepend_sign) { - __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, 0, '-'); - i++; - } - for (; i < uoffset; i++) { - __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, i, padding_char); - } - } - for (i=0; i < clength; i++) { - __Pyx_PyUnicode_WRITE(PyUnicode_1BYTE_KIND, udata, uoffset+i, chars[i]); - } -#else - { - PyObject *sign = NULL, *padding = NULL; - uval = NULL; - if (uoffset > 0) { - prepend_sign = !!prepend_sign; - if (uoffset > prepend_sign) { - padding = PyUnicode_FromOrdinal(padding_char); - if (likely(padding) && uoffset > prepend_sign + 1) { - PyObject *tmp; - PyObject *repeat = PyInt_FromSsize_t(uoffset - prepend_sign); - if (unlikely(!repeat)) goto done_or_error; - tmp = PyNumber_Multiply(padding, repeat); - Py_DECREF(repeat); - Py_DECREF(padding); - padding = tmp; - } - if (unlikely(!padding)) goto done_or_error; - } - if (prepend_sign) { - sign = PyUnicode_FromOrdinal('-'); - if (unlikely(!sign)) goto done_or_error; - } - } - uval = PyUnicode_DecodeASCII(chars, clength, NULL); - if (likely(uval) && padding) { - PyObject *tmp = PyNumber_Add(padding, uval); - Py_DECREF(uval); - uval = tmp; - } - if (likely(uval) && sign) { - PyObject *tmp = PyNumber_Add(sign, uval); - Py_DECREF(uval); - uval = tmp; - } -done_or_error: - Py_XDECREF(padding); - Py_XDECREF(sign); - } -#endif - return uval; -} - -/* CIntToPyUnicode */ -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_int(int value, Py_ssize_t width, char padding_char, char format_char) { - char digits[sizeof(int)*3+2]; - char *dpos, *end = digits + sizeof(int)*3+2; - const char *hex_digits = DIGITS_HEX; - Py_ssize_t length, ulength; - int prepend_sign, last_one_off; - int remaining; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (format_char == 'X') { - hex_digits += 16; - format_char = 'x'; - } - remaining = value; - last_one_off = 0; - dpos = end; - do { - int digit_pos; - switch (format_char) { - case 'o': - digit_pos = abs((int)(remaining % (8*8))); - remaining = (int) (remaining / (8*8)); - dpos -= 2; - memcpy(dpos, DIGIT_PAIRS_8 + digit_pos * 2, 2); - last_one_off = (digit_pos < 8); - break; - case 'd': - digit_pos = abs((int)(remaining % (10*10))); - remaining = (int) (remaining / (10*10)); - dpos -= 2; - memcpy(dpos, DIGIT_PAIRS_10 + digit_pos * 2, 2); - last_one_off = (digit_pos < 10); - break; - case 'x': - *(--dpos) = hex_digits[abs((int)(remaining % 16))]; - remaining = (int) (remaining / 16); - break; - default: - assert(0); - break; - } - } while (unlikely(remaining != 0)); - assert(!last_one_off || *dpos == '0'); - dpos += last_one_off; - length = end - dpos; - ulength = length; - prepend_sign = 0; - if (!is_unsigned && value <= neg_one) { - if (padding_char == ' ' || width <= length + 1) { - *(--dpos) = '-'; - ++length; - } else { - prepend_sign = 1; - } - ++ulength; - } - if (width > ulength) { - ulength = width; - } - if (ulength == 1) { - return PyUnicode_FromOrdinal(*dpos); - } - return __Pyx_PyUnicode_BuildFromAscii(ulength, dpos, (int) length, prepend_sign, padding_char); -} - -/* CIntToPyUnicode */ -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From_Py_ssize_t(Py_ssize_t value, Py_ssize_t width, char padding_char, char format_char) { - char digits[sizeof(Py_ssize_t)*3+2]; - char *dpos, *end = digits + sizeof(Py_ssize_t)*3+2; - const char *hex_digits = DIGITS_HEX; - Py_ssize_t length, ulength; - int prepend_sign, last_one_off; - Py_ssize_t remaining; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const Py_ssize_t neg_one = (Py_ssize_t) -1, const_zero = (Py_ssize_t) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (format_char == 'X') { - hex_digits += 16; - format_char = 'x'; - } - remaining = value; - last_one_off = 0; - dpos = end; - do { - int digit_pos; - switch (format_char) { - case 'o': - digit_pos = abs((int)(remaining % (8*8))); - remaining = (Py_ssize_t) (remaining / (8*8)); - dpos -= 2; - memcpy(dpos, DIGIT_PAIRS_8 + digit_pos * 2, 2); - last_one_off = (digit_pos < 8); - break; - case 'd': - digit_pos = abs((int)(remaining % (10*10))); - remaining = (Py_ssize_t) (remaining / (10*10)); - dpos -= 2; - memcpy(dpos, DIGIT_PAIRS_10 + digit_pos * 2, 2); - last_one_off = (digit_pos < 10); - break; - case 'x': - *(--dpos) = hex_digits[abs((int)(remaining % 16))]; - remaining = (Py_ssize_t) (remaining / 16); - break; - default: - assert(0); - break; - } - } while (unlikely(remaining != 0)); - assert(!last_one_off || *dpos == '0'); - dpos += last_one_off; - length = end - dpos; - ulength = length; - prepend_sign = 0; - if (!is_unsigned && value <= neg_one) { - if (padding_char == ' ' || width <= length + 1) { - *(--dpos) = '-'; - ++length; - } else { - prepend_sign = 1; - } - ++ulength; - } - if (width > ulength) { - ulength = width; - } - if (ulength == 1) { - return PyUnicode_FromOrdinal(*dpos); - } - return __Pyx_PyUnicode_BuildFromAscii(ulength, dpos, (int) length, prepend_sign, padding_char); -} - -/* JoinPyUnicode */ -static PyObject* __Pyx_PyUnicode_Join(PyObject* value_tuple, Py_ssize_t value_count, Py_ssize_t result_ulength, - Py_UCS4 max_char) { -#if CYTHON_USE_UNICODE_INTERNALS && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - PyObject *result_uval; - int result_ukind, kind_shift; - Py_ssize_t i, char_pos; - void *result_udata; - CYTHON_MAYBE_UNUSED_VAR(max_char); -#if CYTHON_PEP393_ENABLED - result_uval = PyUnicode_New(result_ulength, max_char); - if (unlikely(!result_uval)) return NULL; - result_ukind = (max_char <= 255) ? PyUnicode_1BYTE_KIND : (max_char <= 65535) ? PyUnicode_2BYTE_KIND : PyUnicode_4BYTE_KIND; - kind_shift = (result_ukind == PyUnicode_4BYTE_KIND) ? 2 : result_ukind - 1; - result_udata = PyUnicode_DATA(result_uval); -#else - result_uval = PyUnicode_FromUnicode(NULL, result_ulength); - if (unlikely(!result_uval)) return NULL; - result_ukind = sizeof(Py_UNICODE); - kind_shift = (result_ukind == 4) ? 2 : result_ukind - 1; - result_udata = PyUnicode_AS_UNICODE(result_uval); -#endif - assert(kind_shift == 2 || kind_shift == 1 || kind_shift == 0); - char_pos = 0; - for (i=0; i < value_count; i++) { - int ukind; - Py_ssize_t ulength; - void *udata; - PyObject *uval = PyTuple_GET_ITEM(value_tuple, i); - if (unlikely(__Pyx_PyUnicode_READY(uval))) - goto bad; - ulength = __Pyx_PyUnicode_GET_LENGTH(uval); - if (unlikely(!ulength)) - continue; - if (unlikely((PY_SSIZE_T_MAX >> kind_shift) - ulength < char_pos)) - goto overflow; - ukind = __Pyx_PyUnicode_KIND(uval); - udata = __Pyx_PyUnicode_DATA(uval); - if (!CYTHON_PEP393_ENABLED || ukind == result_ukind) { - memcpy((char *)result_udata + (char_pos << kind_shift), udata, (size_t) (ulength << kind_shift)); - } else { - #if PY_VERSION_HEX >= 0x030D0000 - if (unlikely(PyUnicode_CopyCharacters(result_uval, char_pos, uval, 0, ulength) < 0)) goto bad; - #elif CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030300F0 || defined(_PyUnicode_FastCopyCharacters) - _PyUnicode_FastCopyCharacters(result_uval, char_pos, uval, 0, ulength); - #else - Py_ssize_t j; - for (j=0; j < ulength; j++) { - Py_UCS4 uchar = __Pyx_PyUnicode_READ(ukind, udata, j); - __Pyx_PyUnicode_WRITE(result_ukind, result_udata, char_pos+j, uchar); - } - #endif - } - char_pos += ulength; - } - return result_uval; -overflow: - PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string"); -bad: - Py_DECREF(result_uval); - return NULL; -#else - CYTHON_UNUSED_VAR(max_char); - CYTHON_UNUSED_VAR(result_ulength); - CYTHON_UNUSED_VAR(value_count); - return PyUnicode_Join(__pyx_empty_unicode, value_tuple); -#endif -} - -/* GetAttr */ -static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) { -#if CYTHON_USE_TYPE_SLOTS -#if PY_MAJOR_VERSION >= 3 - if (likely(PyUnicode_Check(n))) -#else - if (likely(PyString_Check(n))) -#endif - return __Pyx_PyObject_GetAttrStr(o, n); -#endif - return PyObject_GetAttr(o, n); -} - -/* GetItemInt */ -static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) { - PyObject *r; - if (unlikely(!j)) return NULL; - r = PyObject_GetItem(o, j); - Py_DECREF(j); - return r; -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - Py_ssize_t wrapped_i = i; - if (wraparound & unlikely(i < 0)) { - wrapped_i += PyList_GET_SIZE(o); - } - if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) { - PyObject *r = PyList_GET_ITEM(o, wrapped_i); - Py_INCREF(r); - return r; - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -#else - return PySequence_GetItem(o, i); -#endif -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - Py_ssize_t wrapped_i = i; - if (wraparound & unlikely(i < 0)) { - wrapped_i += PyTuple_GET_SIZE(o); - } - if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, wrapped_i); - Py_INCREF(r); - return r; - } - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -#else - return PySequence_GetItem(o, i); -#endif -} -static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, - CYTHON_NCP_UNUSED int wraparound, - CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS - if (is_list || PyList_CheckExact(o)) { - Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o); - if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) { - PyObject *r = PyList_GET_ITEM(o, n); - Py_INCREF(r); - return r; - } - } - else if (PyTuple_CheckExact(o)) { - Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o); - if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) { - PyObject *r = PyTuple_GET_ITEM(o, n); - Py_INCREF(r); - return r; - } - } else { - PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; - PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; - if (mm && mm->mp_subscript) { - PyObject *r, *key = PyInt_FromSsize_t(i); - if (unlikely(!key)) return NULL; - r = mm->mp_subscript(o, key); - Py_DECREF(key); - return r; - } - if (likely(sm && sm->sq_item)) { - if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { - Py_ssize_t l = sm->sq_length(o); - if (likely(l >= 0)) { - i += l; - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - return NULL; - PyErr_Clear(); - } - } - return sm->sq_item(o, i); - } - } -#else - if (is_list || PySequence_Check(o)) { - return PySequence_GetItem(o, i); - } -#endif - return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); -} - -/* PyObjectCallOneArg */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *args[2] = {NULL, arg}; - return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* ObjectGetItem */ -#if CYTHON_USE_TYPE_SLOTS -static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject *index) { - PyObject *runerr = NULL; - Py_ssize_t key_value; - key_value = __Pyx_PyIndex_AsSsize_t(index); - if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) { - return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1); - } - if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) { - __Pyx_TypeName index_type_name = __Pyx_PyType_GetName(Py_TYPE(index)); - PyErr_Clear(); - PyErr_Format(PyExc_IndexError, - "cannot fit '" __Pyx_FMT_TYPENAME "' into an index-sized integer", index_type_name); - __Pyx_DECREF_TypeName(index_type_name); - } - return NULL; -} -static PyObject *__Pyx_PyObject_GetItem_Slow(PyObject *obj, PyObject *key) { - __Pyx_TypeName obj_type_name; - if (likely(PyType_Check(obj))) { - PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(obj, __pyx_n_s_class_getitem); - if (meth) { - PyObject *result = __Pyx_PyObject_CallOneArg(meth, key); - Py_DECREF(meth); - return result; - } - } - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' object is not subscriptable", obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return NULL; -} -static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key) { - PyTypeObject *tp = Py_TYPE(obj); - PyMappingMethods *mm = tp->tp_as_mapping; - PySequenceMethods *sm = tp->tp_as_sequence; - if (likely(mm && mm->mp_subscript)) { - return mm->mp_subscript(obj, key); - } - if (likely(sm && sm->sq_item)) { - return __Pyx_PyObject_GetIndex(obj, key); - } - return __Pyx_PyObject_GetItem_Slow(obj, key); -} -#endif - -/* KeywordStringCheck */ -static int __Pyx_CheckKeywordStrings( - PyObject *kw, - const char* function_name, - int kw_allowed) -{ - PyObject* key = 0; - Py_ssize_t pos = 0; -#if CYTHON_COMPILING_IN_PYPY - if (!kw_allowed && PyDict_Next(kw, &pos, &key, 0)) - goto invalid_keyword; - return 1; -#else - if (CYTHON_METH_FASTCALL && likely(PyTuple_Check(kw))) { - Py_ssize_t kwsize; -#if CYTHON_ASSUME_SAFE_MACROS - kwsize = PyTuple_GET_SIZE(kw); -#else - kwsize = PyTuple_Size(kw); - if (kwsize < 0) return 0; -#endif - if (unlikely(kwsize == 0)) - return 1; - if (!kw_allowed) { -#if CYTHON_ASSUME_SAFE_MACROS - key = PyTuple_GET_ITEM(kw, 0); -#else - key = PyTuple_GetItem(kw, pos); - if (!key) return 0; -#endif - goto invalid_keyword; - } -#if PY_VERSION_HEX < 0x03090000 - for (pos = 0; pos < kwsize; pos++) { -#if CYTHON_ASSUME_SAFE_MACROS - key = PyTuple_GET_ITEM(kw, pos); -#else - key = PyTuple_GetItem(kw, pos); - if (!key) return 0; -#endif - if (unlikely(!PyUnicode_Check(key))) - goto invalid_keyword_type; - } -#endif - return 1; - } - while (PyDict_Next(kw, &pos, &key, 0)) { - #if PY_MAJOR_VERSION < 3 - if (unlikely(!PyString_Check(key))) - #endif - if (unlikely(!PyUnicode_Check(key))) - goto invalid_keyword_type; - } - if (!kw_allowed && unlikely(key)) - goto invalid_keyword; - return 1; -invalid_keyword_type: - PyErr_Format(PyExc_TypeError, - "%.200s() keywords must be strings", function_name); - return 0; -#endif -invalid_keyword: - #if PY_MAJOR_VERSION < 3 - PyErr_Format(PyExc_TypeError, - "%.200s() got an unexpected keyword argument '%.200s'", - function_name, PyString_AsString(key)); - #else - PyErr_Format(PyExc_TypeError, - "%s() got an unexpected keyword argument '%U'", - function_name, key); - #endif - return 0; -} - -/* DivInt[Py_ssize_t] */ -static CYTHON_INLINE Py_ssize_t __Pyx_div_Py_ssize_t(Py_ssize_t a, Py_ssize_t b) { - Py_ssize_t q = a / b; - Py_ssize_t r = a - q*b; - q -= ((r != 0) & ((r ^ b) < 0)); - return q; -} - -/* GetAttr3 */ -static PyObject *__Pyx_GetAttr3Default(PyObject *d) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (unlikely(!__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError))) - return NULL; - __Pyx_PyErr_Clear(); - Py_INCREF(d); - return d; -} -static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *o, PyObject *n, PyObject *d) { - PyObject *r; -#if CYTHON_USE_TYPE_SLOTS - if (likely(PyString_Check(n))) { - r = __Pyx_PyObject_GetAttrStrNoError(o, n); - if (unlikely(!r) && likely(!PyErr_Occurred())) { - r = __Pyx_NewRef(d); - } - return r; - } -#endif - r = PyObject_GetAttr(o, n); - return (likely(r)) ? r : __Pyx_GetAttr3Default(d); -} - -/* PyDictVersioning */ -#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS -static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0; -} -static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) { - PyObject **dictptr = NULL; - Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset; - if (offset) { -#if CYTHON_COMPILING_IN_CPYTHON - dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj); -#else - dictptr = _PyObject_GetDictPtr(obj); -#endif - } - return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0; -} -static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) { - PyObject *dict = Py_TYPE(obj)->tp_dict; - if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict))) - return 0; - return obj_dict_version == __Pyx_get_object_dict_version(obj); -} -#endif - -/* GetModuleGlobalName */ -#if CYTHON_USE_DICT_VERSIONS -static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value) -#else -static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name) -#endif -{ - PyObject *result; -#if !CYTHON_AVOID_BORROWED_REFS -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 - result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash); - __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) - if (likely(result)) { - return __Pyx_NewRef(result); - } else if (unlikely(PyErr_Occurred())) { - return NULL; - } -#elif CYTHON_COMPILING_IN_LIMITED_API - if (unlikely(!__pyx_m)) { - return NULL; - } - result = PyObject_GetAttr(__pyx_m, name); - if (likely(result)) { - return result; - } -#else - result = PyDict_GetItem(__pyx_d, name); - __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) - if (likely(result)) { - return __Pyx_NewRef(result); - } -#endif -#else - result = PyObject_GetItem(__pyx_d, name); - __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version) - if (likely(result)) { - return __Pyx_NewRef(result); - } - PyErr_Clear(); -#endif - return __Pyx_GetBuiltinName(name); -} - -/* RaiseTooManyValuesToUnpack */ -static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) { - PyErr_Format(PyExc_ValueError, - "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected); -} - -/* RaiseNeedMoreValuesToUnpack */ -static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) { - PyErr_Format(PyExc_ValueError, - "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack", - index, (index == 1) ? "" : "s"); -} - -/* RaiseNoneIterError */ -static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) { - PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); -} - -/* ExtTypeTest */ -static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) { - __Pyx_TypeName obj_type_name; - __Pyx_TypeName type_name; - if (unlikely(!type)) { - PyErr_SetString(PyExc_SystemError, "Missing type object"); - return 0; - } - if (likely(__Pyx_TypeCheck(obj, type))) - return 1; - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - type_name = __Pyx_PyType_GetName(type); - PyErr_Format(PyExc_TypeError, - "Cannot convert " __Pyx_FMT_TYPENAME " to " __Pyx_FMT_TYPENAME, - obj_type_name, type_name); - __Pyx_DECREF_TypeName(obj_type_name); - __Pyx_DECREF_TypeName(type_name); - return 0; -} - -/* GetTopmostException */ -#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE -static _PyErr_StackItem * -__Pyx_PyErr_GetTopmostException(PyThreadState *tstate) -{ - _PyErr_StackItem *exc_info = tstate->exc_info; - while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) && - exc_info->previous_item != NULL) - { - exc_info = exc_info->previous_item; - } - return exc_info; -} -#endif - -/* SaveResetException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - PyObject *exc_value = exc_info->exc_value; - if (exc_value == NULL || exc_value == Py_None) { - *value = NULL; - *type = NULL; - *tb = NULL; - } else { - *value = exc_value; - Py_INCREF(*value); - *type = (PyObject*) Py_TYPE(exc_value); - Py_INCREF(*type); - *tb = PyException_GetTraceback(exc_value); - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate); - *type = exc_info->exc_type; - *value = exc_info->exc_value; - *tb = exc_info->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #else - *type = tstate->exc_type; - *value = tstate->exc_value; - *tb = tstate->exc_traceback; - Py_XINCREF(*type); - Py_XINCREF(*value); - Py_XINCREF(*tb); - #endif -} -static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) { - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - PyObject *tmp_value = exc_info->exc_value; - exc_info->exc_value = value; - Py_XDECREF(tmp_value); - Py_XDECREF(type); - Py_XDECREF(tb); - #else - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = type; - exc_info->exc_value = value; - exc_info->exc_traceback = tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = type; - tstate->exc_value = value; - tstate->exc_traceback = tb; - #endif - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); - #endif -} -#endif - -/* GetException */ -#if CYTHON_FAST_THREAD_STATE -static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) -#else -static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb) -#endif -{ - PyObject *local_type = NULL, *local_value, *local_tb = NULL; -#if CYTHON_FAST_THREAD_STATE - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if PY_VERSION_HEX >= 0x030C00A6 - local_value = tstate->current_exception; - tstate->current_exception = 0; - if (likely(local_value)) { - local_type = (PyObject*) Py_TYPE(local_value); - Py_INCREF(local_type); - local_tb = PyException_GetTraceback(local_value); - } - #else - local_type = tstate->curexc_type; - local_value = tstate->curexc_value; - local_tb = tstate->curexc_traceback; - tstate->curexc_type = 0; - tstate->curexc_value = 0; - tstate->curexc_traceback = 0; - #endif -#else - PyErr_Fetch(&local_type, &local_value, &local_tb); -#endif - PyErr_NormalizeException(&local_type, &local_value, &local_tb); -#if CYTHON_FAST_THREAD_STATE && PY_VERSION_HEX >= 0x030C00A6 - if (unlikely(tstate->current_exception)) -#elif CYTHON_FAST_THREAD_STATE - if (unlikely(tstate->curexc_type)) -#else - if (unlikely(PyErr_Occurred())) -#endif - goto bad; - #if PY_MAJOR_VERSION >= 3 - if (local_tb) { - if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0)) - goto bad; - } - #endif - Py_XINCREF(local_tb); - Py_XINCREF(local_type); - Py_XINCREF(local_value); - *type = local_type; - *value = local_value; - *tb = local_tb; -#if CYTHON_FAST_THREAD_STATE - #if CYTHON_USE_EXC_INFO_STACK - { - _PyErr_StackItem *exc_info = tstate->exc_info; - #if PY_VERSION_HEX >= 0x030B00a4 - tmp_value = exc_info->exc_value; - exc_info->exc_value = local_value; - tmp_type = NULL; - tmp_tb = NULL; - Py_XDECREF(local_type); - Py_XDECREF(local_tb); - #else - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = local_type; - exc_info->exc_value = local_value; - exc_info->exc_traceback = local_tb; - #endif - } - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = local_type; - tstate->exc_value = local_value; - tstate->exc_traceback = local_tb; - #endif - Py_XDECREF(tmp_type); - Py_XDECREF(tmp_value); - Py_XDECREF(tmp_tb); -#else - PyErr_SetExcInfo(local_type, local_value, local_tb); -#endif - return 0; -bad: - *type = 0; - *value = 0; - *tb = 0; - Py_XDECREF(local_type); - Py_XDECREF(local_value); - Py_XDECREF(local_tb); - return -1; -} - -/* SwapException */ -#if CYTHON_FAST_THREAD_STATE -static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4 - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_value = exc_info->exc_value; - exc_info->exc_value = *value; - if (tmp_value == NULL || tmp_value == Py_None) { - Py_XDECREF(tmp_value); - tmp_value = NULL; - tmp_type = NULL; - tmp_tb = NULL; - } else { - tmp_type = (PyObject*) Py_TYPE(tmp_value); - Py_INCREF(tmp_type); - #if CYTHON_COMPILING_IN_CPYTHON - tmp_tb = ((PyBaseExceptionObject*) tmp_value)->traceback; - Py_XINCREF(tmp_tb); - #else - tmp_tb = PyException_GetTraceback(tmp_value); - #endif - } - #elif CYTHON_USE_EXC_INFO_STACK - _PyErr_StackItem *exc_info = tstate->exc_info; - tmp_type = exc_info->exc_type; - tmp_value = exc_info->exc_value; - tmp_tb = exc_info->exc_traceback; - exc_info->exc_type = *type; - exc_info->exc_value = *value; - exc_info->exc_traceback = *tb; - #else - tmp_type = tstate->exc_type; - tmp_value = tstate->exc_value; - tmp_tb = tstate->exc_traceback; - tstate->exc_type = *type; - tstate->exc_value = *value; - tstate->exc_traceback = *tb; - #endif - *type = tmp_type; - *value = tmp_value; - *tb = tmp_tb; -} -#else -static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb) { - PyObject *tmp_type, *tmp_value, *tmp_tb; - PyErr_GetExcInfo(&tmp_type, &tmp_value, &tmp_tb); - PyErr_SetExcInfo(*type, *value, *tb); - *type = tmp_type; - *value = tmp_value; - *tb = tmp_tb; -} -#endif - -/* Import */ -static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) { - PyObject *module = 0; - PyObject *empty_dict = 0; - PyObject *empty_list = 0; - #if PY_MAJOR_VERSION < 3 - PyObject *py_import; - py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import); - if (unlikely(!py_import)) - goto bad; - if (!from_list) { - empty_list = PyList_New(0); - if (unlikely(!empty_list)) - goto bad; - from_list = empty_list; - } - #endif - empty_dict = PyDict_New(); - if (unlikely(!empty_dict)) - goto bad; - { - #if PY_MAJOR_VERSION >= 3 - if (level == -1) { - if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) { - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, 1); - if (unlikely(!module)) { - if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError))) - goto bad; - PyErr_Clear(); - } - } - level = 0; - } - #endif - if (!module) { - #if PY_MAJOR_VERSION < 3 - PyObject *py_level = PyInt_FromLong(level); - if (unlikely(!py_level)) - goto bad; - module = PyObject_CallFunctionObjArgs(py_import, - name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL); - Py_DECREF(py_level); - #else - module = PyImport_ImportModuleLevelObject( - name, __pyx_d, empty_dict, from_list, level); - #endif - } - } -bad: - Py_XDECREF(empty_dict); - Py_XDECREF(empty_list); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_import); - #endif - return module; -} - -/* ImportDottedModule */ -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) { - PyObject *partial_name = NULL, *slice = NULL, *sep = NULL; - if (unlikely(PyErr_Occurred())) { - PyErr_Clear(); - } - if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) { - partial_name = name; - } else { - slice = PySequence_GetSlice(parts_tuple, 0, count); - if (unlikely(!slice)) - goto bad; - sep = PyUnicode_FromStringAndSize(".", 1); - if (unlikely(!sep)) - goto bad; - partial_name = PyUnicode_Join(sep, slice); - } - PyErr_Format( -#if PY_MAJOR_VERSION < 3 - PyExc_ImportError, - "No module named '%s'", PyString_AS_STRING(partial_name)); -#else -#if PY_VERSION_HEX >= 0x030600B1 - PyExc_ModuleNotFoundError, -#else - PyExc_ImportError, -#endif - "No module named '%U'", partial_name); -#endif -bad: - Py_XDECREF(sep); - Py_XDECREF(slice); - Py_XDECREF(partial_name); - return NULL; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) { - PyObject *imported_module; -#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) - PyObject *modules = PyImport_GetModuleDict(); - if (unlikely(!modules)) - return NULL; - imported_module = __Pyx_PyDict_GetItemStr(modules, name); - Py_XINCREF(imported_module); -#else - imported_module = PyImport_GetModule(name); -#endif - return imported_module; -} -#endif -#if PY_MAJOR_VERSION >= 3 -static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) { - Py_ssize_t i, nparts; - nparts = PyTuple_GET_SIZE(parts_tuple); - for (i=1; i < nparts && module; i++) { - PyObject *part, *submodule; -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - part = PyTuple_GET_ITEM(parts_tuple, i); -#else - part = PySequence_ITEM(parts_tuple, i); -#endif - submodule = __Pyx_PyObject_GetAttrStrNoError(module, part); -#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(part); -#endif - Py_DECREF(module); - module = submodule; - } - if (unlikely(!module)) { - return __Pyx__ImportDottedModule_Error(name, parts_tuple, i); - } - return module; -} -#endif -static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if PY_MAJOR_VERSION < 3 - PyObject *module, *from_list, *star = __pyx_n_s__3; - CYTHON_UNUSED_VAR(parts_tuple); - from_list = PyList_New(1); - if (unlikely(!from_list)) - return NULL; - Py_INCREF(star); - PyList_SET_ITEM(from_list, 0, star); - module = __Pyx_Import(name, from_list, 0); - Py_DECREF(from_list); - return module; -#else - PyObject *imported_module; - PyObject *module = __Pyx_Import(name, NULL, 0); - if (!parts_tuple || unlikely(!module)) - return module; - imported_module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(imported_module)) { - Py_DECREF(module); - return imported_module; - } - PyErr_Clear(); - return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple); -#endif -} -static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) { -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1 - PyObject *module = __Pyx__ImportDottedModule_Lookup(name); - if (likely(module)) { - PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec); - if (likely(spec)) { - PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing); - if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) { - Py_DECREF(spec); - spec = NULL; - } - Py_XDECREF(unsafe); - } - if (likely(!spec)) { - PyErr_Clear(); - return module; - } - Py_DECREF(spec); - Py_DECREF(module); - } else if (PyErr_Occurred()) { - PyErr_Clear(); - } -#endif - return __Pyx__ImportDottedModule(name, parts_tuple); -} - -/* ssize_strlen */ -static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) { - size_t len = strlen(s); - if (unlikely(len > PY_SSIZE_T_MAX)) { - PyErr_SetString(PyExc_OverflowError, "byte string is too long"); - return -1; - } - return (Py_ssize_t) len; -} - -/* FastTypeChecks */ -#if CYTHON_COMPILING_IN_CPYTHON -static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) { - while (a) { - a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*); - if (a == b) - return 1; - } - return b == &PyBaseObject_Type; -} -static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (a == b) return 1; - mro = a->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(a, b); -} -static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) { - PyObject *mro; - if (cls == a || cls == b) return 1; - mro = cls->tp_mro; - if (likely(mro)) { - Py_ssize_t i, n; - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - PyObject *base = PyTuple_GET_ITEM(mro, i); - if (base == (PyObject *)a || base == (PyObject *)b) - return 1; - } - return 0; - } - return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b); -} -#if PY_MAJOR_VERSION == 2 -static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) { - PyObject *exception, *value, *tb; - int res; - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&exception, &value, &tb); - res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0; - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - if (!res) { - res = PyObject_IsSubclass(err, exc_type2); - if (unlikely(res == -1)) { - PyErr_WriteUnraisable(err); - res = 0; - } - } - __Pyx_ErrRestore(exception, value, tb); - return res; -} -#else -static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) { - if (exc_type1) { - return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2); - } else { - return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2); - } -} -#endif -static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) { - Py_ssize_t i, n; - assert(PyExceptionClass_Check(exc_type)); - n = PyTuple_GET_SIZE(tuple); -#if PY_MAJOR_VERSION >= 3 - for (i=0; itp_as_sequence && type->tp_as_sequence->sq_repeat)) { - return type->tp_as_sequence->sq_repeat(seq, mul); - } else -#endif - { - return __Pyx_PySequence_Multiply_Generic(seq, mul); - } -} - -/* SetItemInt */ -static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) { - int r; - if (unlikely(!j)) return -1; - r = PyObject_SetItem(o, j, v); - Py_DECREF(j); - return r; -} -static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list, - CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS - if (is_list || PyList_CheckExact(o)) { - Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o)); - if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) { - PyObject* old = PyList_GET_ITEM(o, n); - Py_INCREF(v); - PyList_SET_ITEM(o, n, v); - Py_DECREF(old); - return 1; - } - } else { - PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping; - PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence; - if (mm && mm->mp_ass_subscript) { - int r; - PyObject *key = PyInt_FromSsize_t(i); - if (unlikely(!key)) return -1; - r = mm->mp_ass_subscript(o, key, v); - Py_DECREF(key); - return r; - } - if (likely(sm && sm->sq_ass_item)) { - if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) { - Py_ssize_t l = sm->sq_length(o); - if (likely(l >= 0)) { - i += l; - } else { - if (!PyErr_ExceptionMatches(PyExc_OverflowError)) - return -1; - PyErr_Clear(); - } - } - return sm->sq_ass_item(o, i, v); - } - } -#else -#if CYTHON_COMPILING_IN_PYPY - if (is_list || (PySequence_Check(o) && !PyDict_Check(o))) -#else - if (is_list || PySequence_Check(o)) -#endif - { - return PySequence_SetItem(o, i, v); - } -#endif - return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v); -} - -/* RaiseUnboundLocalError */ -static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname) { - PyErr_Format(PyExc_UnboundLocalError, "local variable '%s' referenced before assignment", varname); -} - -/* DivInt[long] */ -static CYTHON_INLINE long __Pyx_div_long(long a, long b) { - long q = a / b; - long r = a - q*b; - q -= ((r != 0) & ((r ^ b) < 0)); - return q; -} - -/* ImportFrom */ -static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { - PyObject* value = __Pyx_PyObject_GetAttrStr(module, name); - if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) { - const char* module_name_str = 0; - PyObject* module_name = 0; - PyObject* module_dot = 0; - PyObject* full_name = 0; - PyErr_Clear(); - module_name_str = PyModule_GetName(module); - if (unlikely(!module_name_str)) { goto modbad; } - module_name = PyUnicode_FromString(module_name_str); - if (unlikely(!module_name)) { goto modbad; } - module_dot = PyUnicode_Concat(module_name, __pyx_kp_u__2); - if (unlikely(!module_dot)) { goto modbad; } - full_name = PyUnicode_Concat(module_dot, name); - if (unlikely(!full_name)) { goto modbad; } - #if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM < 0x07030400) - { - PyObject *modules = PyImport_GetModuleDict(); - if (unlikely(!modules)) - goto modbad; - value = PyObject_GetItem(modules, full_name); - } - #else - value = PyImport_GetModule(full_name); - #endif - modbad: - Py_XDECREF(full_name); - Py_XDECREF(module_dot); - Py_XDECREF(module_name); - } - if (unlikely(!value)) { - PyErr_Format(PyExc_ImportError, - #if PY_MAJOR_VERSION < 3 - "cannot import name %.230s", PyString_AS_STRING(name)); - #else - "cannot import name %S", name); - #endif - } - return value; -} - -/* HasAttr */ -static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) { - PyObject *r; - if (unlikely(!__Pyx_PyBaseString_Check(n))) { - PyErr_SetString(PyExc_TypeError, - "hasattr(): attribute name must be string"); - return -1; - } - r = __Pyx_GetAttr(o, n); - if (!r) { - PyErr_Clear(); - return 0; - } else { - Py_DECREF(r); - return 1; - } -} - -/* pep479 */ -static void __Pyx_Generator_Replace_StopIteration(int in_async_gen) { - PyObject *exc, *val, *tb, *cur_exc; - __Pyx_PyThreadState_declare - #ifdef __Pyx_StopAsyncIteration_USED - int is_async_stopiteration = 0; - #endif - CYTHON_MAYBE_UNUSED_VAR(in_async_gen); - cur_exc = PyErr_Occurred(); - if (likely(!__Pyx_PyErr_GivenExceptionMatches(cur_exc, PyExc_StopIteration))) { - #ifdef __Pyx_StopAsyncIteration_USED - if (in_async_gen && unlikely(__Pyx_PyErr_GivenExceptionMatches(cur_exc, __Pyx_PyExc_StopAsyncIteration))) { - is_async_stopiteration = 1; - } else - #endif - return; - } - __Pyx_PyThreadState_assign - __Pyx_GetException(&exc, &val, &tb); - Py_XDECREF(exc); - Py_XDECREF(val); - Py_XDECREF(tb); - PyErr_SetString(PyExc_RuntimeError, - #ifdef __Pyx_StopAsyncIteration_USED - is_async_stopiteration ? "async generator raised StopAsyncIteration" : - in_async_gen ? "async generator raised StopIteration" : - #endif - "generator raised StopIteration"); -} - -/* RaiseKeywordRequired */ -static void __Pyx_RaiseKeywordRequired(const char* func_name, PyObject* kw_name) { - PyErr_Format(PyExc_TypeError, - #if PY_MAJOR_VERSION >= 3 - "%s() needs keyword-only argument %U", func_name, kw_name); - #else - "%s() needs keyword-only argument %s", func_name, - PyString_AS_STRING(kw_name)); - #endif -} - -/* PyIntCompare */ -static CYTHON_INLINE int __Pyx_PyInt_BoolEqObjC(PyObject *op1, PyObject *op2, long intval, long inplace) { - CYTHON_MAYBE_UNUSED_VAR(intval); - CYTHON_UNUSED_VAR(inplace); - if (op1 == op2) { - return 1; - } - #if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(op1))) { - const long b = intval; - long a = PyInt_AS_LONG(op1); - return (a == b); - } - #endif - #if CYTHON_USE_PYLONG_INTERNALS - if (likely(PyLong_CheckExact(op1))) { - int unequal; - unsigned long uintval; - Py_ssize_t size = __Pyx_PyLong_DigitCount(op1); - const digit* digits = __Pyx_PyLong_Digits(op1); - if (intval == 0) { - return (__Pyx_PyLong_IsZero(op1) == 1); - } else if (intval < 0) { - if (__Pyx_PyLong_IsNonNeg(op1)) - return 0; - intval = -intval; - } else { - if (__Pyx_PyLong_IsNeg(op1)) - return 0; - } - uintval = (unsigned long) intval; -#if PyLong_SHIFT * 4 < SIZEOF_LONG*8 - if (uintval >> (PyLong_SHIFT * 4)) { - unequal = (size != 5) || (digits[0] != (uintval & (unsigned long) PyLong_MASK)) - | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[3] != ((uintval >> (3 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[4] != ((uintval >> (4 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)); - } else -#endif -#if PyLong_SHIFT * 3 < SIZEOF_LONG*8 - if (uintval >> (PyLong_SHIFT * 3)) { - unequal = (size != 4) || (digits[0] != (uintval & (unsigned long) PyLong_MASK)) - | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[3] != ((uintval >> (3 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)); - } else -#endif -#if PyLong_SHIFT * 2 < SIZEOF_LONG*8 - if (uintval >> (PyLong_SHIFT * 2)) { - unequal = (size != 3) || (digits[0] != (uintval & (unsigned long) PyLong_MASK)) - | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)); - } else -#endif -#if PyLong_SHIFT * 1 < SIZEOF_LONG*8 - if (uintval >> (PyLong_SHIFT * 1)) { - unequal = (size != 2) || (digits[0] != (uintval & (unsigned long) PyLong_MASK)) - | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)); - } else -#endif - unequal = (size != 1) || (((unsigned long) digits[0]) != (uintval & (unsigned long) PyLong_MASK)); - return (unequal == 0); - } - #endif - if (PyFloat_CheckExact(op1)) { - const long b = intval; -#if CYTHON_COMPILING_IN_LIMITED_API - double a = __pyx_PyFloat_AsDouble(op1); -#else - double a = PyFloat_AS_DOUBLE(op1); -#endif - return ((double)a == (double)b); - } - return __Pyx_PyObject_IsTrueAndDecref( - PyObject_RichCompare(op1, op2, Py_EQ)); -} - -/* BufferIndexError */ -static void __Pyx_RaiseBufferIndexError(int axis) { - PyErr_Format(PyExc_IndexError, - "Out of bounds on buffer access (axis %d)", axis); -} - -/* PyObject_GenericGetAttrNoDict */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject *__Pyx_RaiseGenericGetAttributeError(PyTypeObject *tp, PyObject *attr_name) { - __Pyx_TypeName type_name = __Pyx_PyType_GetName(tp); - PyErr_Format(PyExc_AttributeError, -#if PY_MAJOR_VERSION >= 3 - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", - type_name, attr_name); -#else - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", - type_name, PyString_AS_STRING(attr_name)); -#endif - __Pyx_DECREF_TypeName(type_name); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name) { - PyObject *descr; - PyTypeObject *tp = Py_TYPE(obj); - if (unlikely(!PyString_Check(attr_name))) { - return PyObject_GenericGetAttr(obj, attr_name); - } - assert(!tp->tp_dictoffset); - descr = _PyType_Lookup(tp, attr_name); - if (unlikely(!descr)) { - return __Pyx_RaiseGenericGetAttributeError(tp, attr_name); - } - Py_INCREF(descr); - #if PY_MAJOR_VERSION < 3 - if (likely(PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_HAVE_CLASS))) - #endif - { - descrgetfunc f = Py_TYPE(descr)->tp_descr_get; - if (unlikely(f)) { - PyObject *res = f(descr, obj, (PyObject *)tp); - Py_DECREF(descr); - return res; - } - } - return descr; -} -#endif - -/* PyObject_GenericGetAttr */ -#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000 -static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name) { - if (unlikely(Py_TYPE(obj)->tp_dictoffset)) { - return PyObject_GenericGetAttr(obj, attr_name); - } - return __Pyx_PyObject_GenericGetAttrNoDict(obj, attr_name); -} -#endif - -/* FixUpExtensionType */ -#if CYTHON_USE_TYPE_SPECS -static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) { -#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - CYTHON_UNUSED_VAR(spec); - CYTHON_UNUSED_VAR(type); -#else - const PyType_Slot *slot = spec->slots; - while (slot && slot->slot && slot->slot != Py_tp_members) - slot++; - if (slot && slot->slot == Py_tp_members) { - int changed = 0; -#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON) - const -#endif - PyMemberDef *memb = (PyMemberDef*) slot->pfunc; - while (memb && memb->name) { - if (memb->name[0] == '_' && memb->name[1] == '_') { -#if PY_VERSION_HEX < 0x030900b1 - if (strcmp(memb->name, "__weaklistoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_weaklistoffset = memb->offset; - changed = 1; - } - else if (strcmp(memb->name, "__dictoffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); - type->tp_dictoffset = memb->offset; - changed = 1; - } -#if CYTHON_METH_FASTCALL - else if (strcmp(memb->name, "__vectorcalloffset__") == 0) { - assert(memb->type == T_PYSSIZET); - assert(memb->flags == READONLY); -#if PY_VERSION_HEX >= 0x030800b4 - type->tp_vectorcall_offset = memb->offset; -#else - type->tp_print = (printfunc) memb->offset; -#endif - changed = 1; - } -#endif -#else - if ((0)); -#endif -#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON - else if (strcmp(memb->name, "__module__") == 0) { - PyObject *descr; - assert(memb->type == T_OBJECT); - assert(memb->flags == 0 || memb->flags == READONLY); - descr = PyDescr_NewMember(type, memb); - if (unlikely(!descr)) - return -1; - if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) { - Py_DECREF(descr); - return -1; - } - Py_DECREF(descr); - changed = 1; - } -#endif - } - memb++; - } - if (changed) - PyType_Modified(type); - } -#endif - return 0; -} -#endif - -/* PyObjectCallNoArg */ -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { - PyObject *arg = NULL; - return __Pyx_PyObject_FastCall(func, (&arg)+1, 0 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* PyObjectGetMethod */ -static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method) { - PyObject *attr; -#if CYTHON_UNPACK_METHODS && CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_PYTYPE_LOOKUP - __Pyx_TypeName type_name; - PyTypeObject *tp = Py_TYPE(obj); - PyObject *descr; - descrgetfunc f = NULL; - PyObject **dictptr, *dict; - int meth_found = 0; - assert (*method == NULL); - if (unlikely(tp->tp_getattro != PyObject_GenericGetAttr)) { - attr = __Pyx_PyObject_GetAttrStr(obj, name); - goto try_unpack; - } - if (unlikely(tp->tp_dict == NULL) && unlikely(PyType_Ready(tp) < 0)) { - return 0; - } - descr = _PyType_Lookup(tp, name); - if (likely(descr != NULL)) { - Py_INCREF(descr); -#if defined(Py_TPFLAGS_METHOD_DESCRIPTOR) && Py_TPFLAGS_METHOD_DESCRIPTOR - if (__Pyx_PyType_HasFeature(Py_TYPE(descr), Py_TPFLAGS_METHOD_DESCRIPTOR)) -#elif PY_MAJOR_VERSION >= 3 - #ifdef __Pyx_CyFunction_USED - if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type) || __Pyx_CyFunction_Check(descr))) - #else - if (likely(PyFunction_Check(descr) || __Pyx_IS_TYPE(descr, &PyMethodDescr_Type))) - #endif -#else - #ifdef __Pyx_CyFunction_USED - if (likely(PyFunction_Check(descr) || __Pyx_CyFunction_Check(descr))) - #else - if (likely(PyFunction_Check(descr))) - #endif -#endif - { - meth_found = 1; - } else { - f = Py_TYPE(descr)->tp_descr_get; - if (f != NULL && PyDescr_IsData(descr)) { - attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto try_unpack; - } - } - } - dictptr = _PyObject_GetDictPtr(obj); - if (dictptr != NULL && (dict = *dictptr) != NULL) { - Py_INCREF(dict); - attr = __Pyx_PyDict_GetItemStr(dict, name); - if (attr != NULL) { - Py_INCREF(attr); - Py_DECREF(dict); - Py_XDECREF(descr); - goto try_unpack; - } - Py_DECREF(dict); - } - if (meth_found) { - *method = descr; - return 1; - } - if (f != NULL) { - attr = f(descr, obj, (PyObject *)Py_TYPE(obj)); - Py_DECREF(descr); - goto try_unpack; - } - if (likely(descr != NULL)) { - *method = descr; - return 0; - } - type_name = __Pyx_PyType_GetName(tp); - PyErr_Format(PyExc_AttributeError, -#if PY_MAJOR_VERSION >= 3 - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%U'", - type_name, name); -#else - "'" __Pyx_FMT_TYPENAME "' object has no attribute '%.400s'", - type_name, PyString_AS_STRING(name)); -#endif - __Pyx_DECREF_TypeName(type_name); - return 0; -#else - attr = __Pyx_PyObject_GetAttrStr(obj, name); - goto try_unpack; -#endif -try_unpack: -#if CYTHON_UNPACK_METHODS - if (likely(attr) && PyMethod_Check(attr) && likely(PyMethod_GET_SELF(attr) == obj)) { - PyObject *function = PyMethod_GET_FUNCTION(attr); - Py_INCREF(function); - Py_DECREF(attr); - *method = function; - return 1; - } -#endif - *method = attr; - return 0; -} - -/* PyObjectCallMethod0 */ -static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { - PyObject *method = NULL, *result = NULL; - int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); - if (likely(is_method)) { - result = __Pyx_PyObject_CallOneArg(method, obj); - Py_DECREF(method); - return result; - } - if (unlikely(!method)) goto bad; - result = __Pyx_PyObject_CallNoArg(method); - Py_DECREF(method); -bad: - return result; -} - -/* ValidateBasesTuple */ -#if CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API || CYTHON_USE_TYPE_SPECS -static int __Pyx_validate_bases_tuple(const char *type_name, Py_ssize_t dictoffset, PyObject *bases) { - Py_ssize_t i, n; -#if CYTHON_ASSUME_SAFE_MACROS - n = PyTuple_GET_SIZE(bases); -#else - n = PyTuple_Size(bases); - if (n < 0) return -1; -#endif - for (i = 1; i < n; i++) - { -#if CYTHON_AVOID_BORROWED_REFS - PyObject *b0 = PySequence_GetItem(bases, i); - if (!b0) return -1; -#elif CYTHON_ASSUME_SAFE_MACROS - PyObject *b0 = PyTuple_GET_ITEM(bases, i); -#else - PyObject *b0 = PyTuple_GetItem(bases, i); - if (!b0) return -1; -#endif - PyTypeObject *b; -#if PY_MAJOR_VERSION < 3 - if (PyClass_Check(b0)) - { - PyErr_Format(PyExc_TypeError, "base class '%.200s' is an old-style class", - PyString_AS_STRING(((PyClassObject*)b0)->cl_name)); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } -#endif - b = (PyTypeObject*) b0; - if (!__Pyx_PyType_HasFeature(b, Py_TPFLAGS_HEAPTYPE)) - { - __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); - PyErr_Format(PyExc_TypeError, - "base class '" __Pyx_FMT_TYPENAME "' is not a heap type", b_name); - __Pyx_DECREF_TypeName(b_name); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } -#if !CYTHON_USE_TYPE_SLOTS - if (dictoffset == 0) { - PyErr_Format(PyExc_TypeError, - "extension type '%s.200s': " - "unable to validate whether bases have a __dict__ " - "when CYTHON_USE_TYPE_SLOTS is off " - "(likely because you are building in the limited API). " - "Therefore, all extension types with multiple bases " - "must add 'cdef dict __dict__' in this compilation mode", - type_name); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } -#else - if (dictoffset == 0 && b->tp_dictoffset) - { - __Pyx_TypeName b_name = __Pyx_PyType_GetName(b); - PyErr_Format(PyExc_TypeError, - "extension type '%.200s' has no __dict__ slot, " - "but base type '" __Pyx_FMT_TYPENAME "' has: " - "either add 'cdef dict __dict__' to the extension type " - "or add '__slots__ = [...]' to the base type", - type_name, b_name); - __Pyx_DECREF_TypeName(b_name); -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - return -1; - } -#endif -#if CYTHON_AVOID_BORROWED_REFS - Py_DECREF(b0); -#endif - } - return 0; -} -#endif - -/* PyType_Ready */ -static int __Pyx_PyType_Ready(PyTypeObject *t) { -#if CYTHON_USE_TYPE_SPECS || !(CYTHON_COMPILING_IN_CPYTHON || CYTHON_COMPILING_IN_LIMITED_API) || defined(PYSTON_MAJOR_VERSION) - (void)__Pyx_PyObject_CallMethod0; -#if CYTHON_USE_TYPE_SPECS - (void)__Pyx_validate_bases_tuple; -#endif - return PyType_Ready(t); -#else - int r; - PyObject *bases = __Pyx_PyType_GetSlot(t, tp_bases, PyObject*); - if (bases && unlikely(__Pyx_validate_bases_tuple(t->tp_name, t->tp_dictoffset, bases) == -1)) - return -1; -#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) - { - int gc_was_enabled; - #if PY_VERSION_HEX >= 0x030A00b1 - gc_was_enabled = PyGC_Disable(); - (void)__Pyx_PyObject_CallMethod0; - #else - PyObject *ret, *py_status; - PyObject *gc = NULL; - #if PY_VERSION_HEX >= 0x030700a1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM+0 >= 0x07030400) - gc = PyImport_GetModule(__pyx_kp_u_gc); - #endif - if (unlikely(!gc)) gc = PyImport_Import(__pyx_kp_u_gc); - if (unlikely(!gc)) return -1; - py_status = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_isenabled); - if (unlikely(!py_status)) { - Py_DECREF(gc); - return -1; - } - gc_was_enabled = __Pyx_PyObject_IsTrue(py_status); - Py_DECREF(py_status); - if (gc_was_enabled > 0) { - ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_disable); - if (unlikely(!ret)) { - Py_DECREF(gc); - return -1; - } - Py_DECREF(ret); - } else if (unlikely(gc_was_enabled == -1)) { - Py_DECREF(gc); - return -1; - } - #endif - t->tp_flags |= Py_TPFLAGS_HEAPTYPE; -#if PY_VERSION_HEX >= 0x030A0000 - t->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; -#endif -#else - (void)__Pyx_PyObject_CallMethod0; -#endif - r = PyType_Ready(t); -#if PY_VERSION_HEX >= 0x03050000 && !defined(PYSTON_MAJOR_VERSION) - t->tp_flags &= ~Py_TPFLAGS_HEAPTYPE; - #if PY_VERSION_HEX >= 0x030A00b1 - if (gc_was_enabled) - PyGC_Enable(); - #else - if (gc_was_enabled) { - PyObject *tp, *v, *tb; - PyErr_Fetch(&tp, &v, &tb); - ret = __Pyx_PyObject_CallMethod0(gc, __pyx_kp_u_enable); - if (likely(ret || r == -1)) { - Py_XDECREF(ret); - PyErr_Restore(tp, v, tb); - } else { - Py_XDECREF(tp); - Py_XDECREF(v); - Py_XDECREF(tb); - r = -1; - } - } - Py_DECREF(gc); - #endif - } -#endif - return r; -#endif -} - -/* SetVTable */ -static int __Pyx_SetVtable(PyTypeObject *type, void *vtable) { - PyObject *ob = PyCapsule_New(vtable, 0, 0); - if (unlikely(!ob)) - goto bad; -#if CYTHON_COMPILING_IN_LIMITED_API - if (unlikely(PyObject_SetAttr((PyObject *) type, __pyx_n_s_pyx_vtable, ob) < 0)) -#else - if (unlikely(PyDict_SetItem(type->tp_dict, __pyx_n_s_pyx_vtable, ob) < 0)) -#endif - goto bad; - Py_DECREF(ob); - return 0; -bad: - Py_XDECREF(ob); - return -1; -} - -/* GetVTable */ -static void* __Pyx_GetVtable(PyTypeObject *type) { - void* ptr; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *ob = PyObject_GetAttr((PyObject *)type, __pyx_n_s_pyx_vtable); -#else - PyObject *ob = PyObject_GetItem(type->tp_dict, __pyx_n_s_pyx_vtable); -#endif - if (!ob) - goto bad; - ptr = PyCapsule_GetPointer(ob, 0); - if (!ptr && !PyErr_Occurred()) - PyErr_SetString(PyExc_RuntimeError, "invalid vtable found for imported type"); - Py_DECREF(ob); - return ptr; -bad: - Py_XDECREF(ob); - return NULL; -} - -/* MergeVTables */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_MergeVtables(PyTypeObject *type) { - int i; - void** base_vtables; - __Pyx_TypeName tp_base_name; - __Pyx_TypeName base_name; - void* unknown = (void*)-1; - PyObject* bases = type->tp_bases; - int base_depth = 0; - { - PyTypeObject* base = type->tp_base; - while (base) { - base_depth += 1; - base = base->tp_base; - } - } - base_vtables = (void**) malloc(sizeof(void*) * (size_t)(base_depth + 1)); - base_vtables[0] = unknown; - for (i = 1; i < PyTuple_GET_SIZE(bases); i++) { - void* base_vtable = __Pyx_GetVtable(((PyTypeObject*)PyTuple_GET_ITEM(bases, i))); - if (base_vtable != NULL) { - int j; - PyTypeObject* base = type->tp_base; - for (j = 0; j < base_depth; j++) { - if (base_vtables[j] == unknown) { - base_vtables[j] = __Pyx_GetVtable(base); - base_vtables[j + 1] = unknown; - } - if (base_vtables[j] == base_vtable) { - break; - } else if (base_vtables[j] == NULL) { - goto bad; - } - base = base->tp_base; - } - } - } - PyErr_Clear(); - free(base_vtables); - return 0; -bad: - tp_base_name = __Pyx_PyType_GetName(type->tp_base); - base_name = __Pyx_PyType_GetName((PyTypeObject*)PyTuple_GET_ITEM(bases, i)); - PyErr_Format(PyExc_TypeError, - "multiple bases have vtable conflict: '" __Pyx_FMT_TYPENAME "' and '" __Pyx_FMT_TYPENAME "'", tp_base_name, base_name); - __Pyx_DECREF_TypeName(tp_base_name); - __Pyx_DECREF_TypeName(base_name); - free(base_vtables); - return -1; -} -#endif - -/* SetupReduce */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __Pyx_setup_reduce_is_named(PyObject* meth, PyObject* name) { - int ret; - PyObject *name_attr; - name_attr = __Pyx_PyObject_GetAttrStrNoError(meth, __pyx_n_s_name_2); - if (likely(name_attr)) { - ret = PyObject_RichCompareBool(name_attr, name, Py_EQ); - } else { - ret = -1; - } - if (unlikely(ret < 0)) { - PyErr_Clear(); - ret = 0; - } - Py_XDECREF(name_attr); - return ret; -} -static int __Pyx_setup_reduce(PyObject* type_obj) { - int ret = 0; - PyObject *object_reduce = NULL; - PyObject *object_getstate = NULL; - PyObject *object_reduce_ex = NULL; - PyObject *reduce = NULL; - PyObject *reduce_ex = NULL; - PyObject *reduce_cython = NULL; - PyObject *setstate = NULL; - PyObject *setstate_cython = NULL; - PyObject *getstate = NULL; -#if CYTHON_USE_PYTYPE_LOOKUP - getstate = _PyType_Lookup((PyTypeObject*)type_obj, __pyx_n_s_getstate); -#else - getstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_getstate); - if (!getstate && PyErr_Occurred()) { - goto __PYX_BAD; - } -#endif - if (getstate) { -#if CYTHON_USE_PYTYPE_LOOKUP - object_getstate = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_getstate); -#else - object_getstate = __Pyx_PyObject_GetAttrStrNoError((PyObject*)&PyBaseObject_Type, __pyx_n_s_getstate); - if (!object_getstate && PyErr_Occurred()) { - goto __PYX_BAD; - } -#endif - if (object_getstate != getstate) { - goto __PYX_GOOD; - } - } -#if CYTHON_USE_PYTYPE_LOOKUP - object_reduce_ex = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; -#else - object_reduce_ex = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce_ex); if (!object_reduce_ex) goto __PYX_BAD; -#endif - reduce_ex = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce_ex); if (unlikely(!reduce_ex)) goto __PYX_BAD; - if (reduce_ex == object_reduce_ex) { -#if CYTHON_USE_PYTYPE_LOOKUP - object_reduce = _PyType_Lookup(&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; -#else - object_reduce = __Pyx_PyObject_GetAttrStr((PyObject*)&PyBaseObject_Type, __pyx_n_s_reduce); if (!object_reduce) goto __PYX_BAD; -#endif - reduce = __Pyx_PyObject_GetAttrStr(type_obj, __pyx_n_s_reduce); if (unlikely(!reduce)) goto __PYX_BAD; - if (reduce == object_reduce || __Pyx_setup_reduce_is_named(reduce, __pyx_n_s_reduce_cython)) { - reduce_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_reduce_cython); - if (likely(reduce_cython)) { - ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce, reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_reduce_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - } else if (reduce == object_reduce || PyErr_Occurred()) { - goto __PYX_BAD; - } - setstate = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate); - if (!setstate) PyErr_Clear(); - if (!setstate || __Pyx_setup_reduce_is_named(setstate, __pyx_n_s_setstate_cython)) { - setstate_cython = __Pyx_PyObject_GetAttrStrNoError(type_obj, __pyx_n_s_setstate_cython); - if (likely(setstate_cython)) { - ret = PyDict_SetItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate, setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - ret = PyDict_DelItem(((PyTypeObject*)type_obj)->tp_dict, __pyx_n_s_setstate_cython); if (unlikely(ret < 0)) goto __PYX_BAD; - } else if (!setstate || PyErr_Occurred()) { - goto __PYX_BAD; - } - } - PyType_Modified((PyTypeObject*)type_obj); - } - } - goto __PYX_GOOD; -__PYX_BAD: - if (!PyErr_Occurred()) { - __Pyx_TypeName type_obj_name = - __Pyx_PyType_GetName((PyTypeObject*)type_obj); - PyErr_Format(PyExc_RuntimeError, - "Unable to initialize pickling for " __Pyx_FMT_TYPENAME, type_obj_name); - __Pyx_DECREF_TypeName(type_obj_name); - } - ret = -1; -__PYX_GOOD: -#if !CYTHON_USE_PYTYPE_LOOKUP - Py_XDECREF(object_reduce); - Py_XDECREF(object_reduce_ex); - Py_XDECREF(object_getstate); - Py_XDECREF(getstate); -#endif - Py_XDECREF(reduce); - Py_XDECREF(reduce_ex); - Py_XDECREF(reduce_cython); - Py_XDECREF(setstate); - Py_XDECREF(setstate_cython); - return ret; -} -#endif - -/* TypeImport */ -#ifndef __PYX_HAVE_RT_ImportType_3_0_2 -#define __PYX_HAVE_RT_ImportType_3_0_2 -static PyTypeObject *__Pyx_ImportType_3_0_2(PyObject *module, const char *module_name, const char *class_name, - size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_2 check_size) -{ - PyObject *result = 0; - char warning[200]; - Py_ssize_t basicsize; - Py_ssize_t itemsize; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *py_basicsize; - PyObject *py_itemsize; -#endif - result = PyObject_GetAttrString(module, class_name); - if (!result) - goto bad; - if (!PyType_Check(result)) { - PyErr_Format(PyExc_TypeError, - "%.200s.%.200s is not a type object", - module_name, class_name); - goto bad; - } -#if !CYTHON_COMPILING_IN_LIMITED_API - basicsize = ((PyTypeObject *)result)->tp_basicsize; - itemsize = ((PyTypeObject *)result)->tp_itemsize; -#else - py_basicsize = PyObject_GetAttrString(result, "__basicsize__"); - if (!py_basicsize) - goto bad; - basicsize = PyLong_AsSsize_t(py_basicsize); - Py_DECREF(py_basicsize); - py_basicsize = 0; - if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred()) - goto bad; - py_itemsize = PyObject_GetAttrString(result, "__itemsize__"); - if (!py_itemsize) - goto bad; - itemsize = PyLong_AsSsize_t(py_itemsize); - Py_DECREF(py_itemsize); - py_itemsize = 0; - if (itemsize == (Py_ssize_t)-1 && PyErr_Occurred()) - goto bad; -#endif - if (itemsize) { - if (size % alignment) { - alignment = size % alignment; - } - if (itemsize < (Py_ssize_t)alignment) - itemsize = (Py_ssize_t)alignment; - } - if ((size_t)(basicsize + itemsize) < size) { - PyErr_Format(PyExc_ValueError, - "%.200s.%.200s size changed, may indicate binary incompatibility. " - "Expected %zd from C header, got %zd from PyObject", - module_name, class_name, size, basicsize+itemsize); - goto bad; - } - if (check_size == __Pyx_ImportType_CheckSize_Error_3_0_2 && - ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) { - PyErr_Format(PyExc_ValueError, - "%.200s.%.200s size changed, may indicate binary incompatibility. " - "Expected %zd from C header, got %zd-%zd from PyObject", - module_name, class_name, size, basicsize, basicsize+itemsize); - goto bad; - } - else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_0_2 && (size_t)basicsize > size) { - PyOS_snprintf(warning, sizeof(warning), - "%s.%s size changed, may indicate binary incompatibility. " - "Expected %zd from C header, got %zd from PyObject", - module_name, class_name, size, basicsize); - if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad; - } - return (PyTypeObject *)result; -bad: - Py_XDECREF(result); - return NULL; -} -#endif - -/* CIntToPyUnicode */ -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_From___pyx_anon_enum(int value, Py_ssize_t width, char padding_char, char format_char) { - char digits[sizeof(int)*3+2]; - char *dpos, *end = digits + sizeof(int)*3+2; - const char *hex_digits = DIGITS_HEX; - Py_ssize_t length, ulength; - int prepend_sign, last_one_off; - int remaining; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (format_char == 'X') { - hex_digits += 16; - format_char = 'x'; - } - remaining = value; - last_one_off = 0; - dpos = end; - do { - int digit_pos; - switch (format_char) { - case 'o': - digit_pos = abs((int)(remaining % (8*8))); - remaining = (int) (remaining / (8*8)); - dpos -= 2; - memcpy(dpos, DIGIT_PAIRS_8 + digit_pos * 2, 2); - last_one_off = (digit_pos < 8); - break; - case 'd': - digit_pos = abs((int)(remaining % (10*10))); - remaining = (int) (remaining / (10*10)); - dpos -= 2; - memcpy(dpos, DIGIT_PAIRS_10 + digit_pos * 2, 2); - last_one_off = (digit_pos < 10); - break; - case 'x': - *(--dpos) = hex_digits[abs((int)(remaining % 16))]; - remaining = (int) (remaining / 16); - break; - default: - assert(0); - break; - } - } while (unlikely(remaining != 0)); - assert(!last_one_off || *dpos == '0'); - dpos += last_one_off; - length = end - dpos; - ulength = length; - prepend_sign = 0; - if (!is_unsigned && value <= neg_one) { - if (padding_char == ' ' || width <= length + 1) { - *(--dpos) = '-'; - ++length; - } else { - prepend_sign = 1; - } - ++ulength; - } - if (width > ulength) { - ulength = width; - } - if (ulength == 1) { - return PyUnicode_FromOrdinal(*dpos); - } - return __Pyx_PyUnicode_BuildFromAscii(ulength, dpos, (int) length, prepend_sign, padding_char); -} - -/* FetchSharedCythonModule */ -static PyObject *__Pyx_FetchSharedCythonABIModule(void) { - PyObject *abi_module = PyImport_AddModule((char*) __PYX_ABI_MODULE_NAME); - if (unlikely(!abi_module)) return NULL; - Py_INCREF(abi_module); - return abi_module; -} - -/* FetchCommonType */ -static int __Pyx_VerifyCachedType(PyObject *cached_type, - const char *name, - Py_ssize_t basicsize, - Py_ssize_t expected_basicsize) { - if (!PyType_Check(cached_type)) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s is not a type object", name); - return -1; - } - if (basicsize != expected_basicsize) { - PyErr_Format(PyExc_TypeError, - "Shared Cython type %.200s has the wrong size, try recompiling", - name); - return -1; - } - return 0; -} -#if !CYTHON_USE_TYPE_SPECS -static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) { - PyObject* abi_module; - const char* object_name; - PyTypeObject *cached_type = NULL; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - object_name = strrchr(type->tp_name, '.'); - object_name = object_name ? object_name+1 : type->tp_name; - cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - if (__Pyx_VerifyCachedType( - (PyObject *)cached_type, - object_name, - cached_type->tp_basicsize, - type->tp_basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - if (PyType_Ready(type) < 0) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0) - goto bad; - Py_INCREF(type); - cached_type = type; -done: - Py_DECREF(abi_module); - return cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#else -static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) { - PyObject *abi_module, *cached_type = NULL; - const char* object_name = strrchr(spec->name, '.'); - object_name = object_name ? object_name+1 : spec->name; - abi_module = __Pyx_FetchSharedCythonABIModule(); - if (!abi_module) return NULL; - cached_type = PyObject_GetAttrString(abi_module, object_name); - if (cached_type) { - Py_ssize_t basicsize; -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *py_basicsize; - py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__"); - if (unlikely(!py_basicsize)) goto bad; - basicsize = PyLong_AsSsize_t(py_basicsize); - Py_DECREF(py_basicsize); - py_basicsize = 0; - if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad; -#else - basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1; -#endif - if (__Pyx_VerifyCachedType( - cached_type, - object_name, - basicsize, - spec->basicsize) < 0) { - goto bad; - } - goto done; - } - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad; - PyErr_Clear(); - CYTHON_UNUSED_VAR(module); - cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases); - if (unlikely(!cached_type)) goto bad; - if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad; - if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad; -done: - Py_DECREF(abi_module); - assert(cached_type == NULL || PyType_Check(cached_type)); - return (PyTypeObject *) cached_type; -bad: - Py_XDECREF(cached_type); - cached_type = NULL; - goto done; -} -#endif - -/* PyVectorcallFastCallDict */ -#if CYTHON_METH_FASTCALL -static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - PyObject *res = NULL; - PyObject *kwnames; - PyObject **newargs; - PyObject **kwvalues; - Py_ssize_t i, pos; - size_t j; - PyObject *key, *value; - unsigned long keys_are_strings; - Py_ssize_t nkw = PyDict_GET_SIZE(kw); - newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0])); - if (unlikely(newargs == NULL)) { - PyErr_NoMemory(); - return NULL; - } - for (j = 0; j < nargs; j++) newargs[j] = args[j]; - kwnames = PyTuple_New(nkw); - if (unlikely(kwnames == NULL)) { - PyMem_Free(newargs); - return NULL; - } - kwvalues = newargs + nargs; - pos = i = 0; - keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; - while (PyDict_Next(kw, &pos, &key, &value)) { - keys_are_strings &= Py_TYPE(key)->tp_flags; - Py_INCREF(key); - Py_INCREF(value); - PyTuple_SET_ITEM(kwnames, i, key); - kwvalues[i] = value; - i++; - } - if (unlikely(!keys_are_strings)) { - PyErr_SetString(PyExc_TypeError, "keywords must be strings"); - goto cleanup; - } - res = vc(func, newargs, nargs, kwnames); -cleanup: - Py_DECREF(kwnames); - for (i = 0; i < nkw; i++) - Py_DECREF(kwvalues[i]); - PyMem_Free(newargs); - return res; -} -static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw) -{ - if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) { - return vc(func, args, nargs, NULL); - } - return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw); -} -#endif - -/* CythonFunctionShared */ -static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) { -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - __Pyx_Py_XDECREF_SET( - __Pyx_CyFunction_GetClassObj(f), - ((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#else - __Pyx_Py_XDECREF_SET( - ((PyCMethodObject *) (f))->mm_class, - (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL)); -#endif -} -static PyObject * -__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure) -{ - CYTHON_UNUSED_VAR(closure); - if (unlikely(op->func_doc == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_doc = PyObject_GetAttrString(op->func, "__doc__"); - if (unlikely(!op->func_doc)) return NULL; -#else - if (((PyCFunctionObject*)op)->m_ml->ml_doc) { -#if PY_MAJOR_VERSION >= 3 - op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#else - op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc); -#endif - if (unlikely(op->func_doc == NULL)) - return NULL; - } else { - Py_INCREF(Py_None); - return Py_None; - } -#endif - } - Py_INCREF(op->func_doc); - return op->func_doc; -} -static int -__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (value == NULL) { - value = Py_None; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_doc, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_name == NULL)) { -#if CYTHON_COMPILING_IN_LIMITED_API - op->func_name = PyObject_GetAttrString(op->func, "__name__"); -#elif PY_MAJOR_VERSION >= 3 - op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#else - op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name); -#endif - if (unlikely(op->func_name == NULL)) - return NULL; - } - Py_INCREF(op->func_name); - return op->func_name; -} -static int -__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_name, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_qualname); - return op->func_qualname; -} -static int -__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_qualname, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(op->func_dict == NULL)) { - op->func_dict = PyDict_New(); - if (unlikely(op->func_dict == NULL)) - return NULL; - } - Py_INCREF(op->func_dict); - return op->func_dict; -} -static int -__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); - if (unlikely(value == NULL)) { - PyErr_SetString(PyExc_TypeError, - "function's dictionary may not be deleted"); - return -1; - } - if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "setting function's dictionary to a non-dict"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->func_dict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(context); - Py_INCREF(op->func_globals); - return op->func_globals; -} -static PyObject * -__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context) -{ - CYTHON_UNUSED_VAR(op); - CYTHON_UNUSED_VAR(context); - Py_INCREF(Py_None); - return Py_None; -} -static PyObject * -__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context) -{ - PyObject* result = (op->func_code) ? op->func_code : Py_None; - CYTHON_UNUSED_VAR(context); - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) { - int result = 0; - PyObject *res = op->defaults_getter((PyObject *) op); - if (unlikely(!res)) - return -1; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - op->defaults_tuple = PyTuple_GET_ITEM(res, 0); - Py_INCREF(op->defaults_tuple); - op->defaults_kwdict = PyTuple_GET_ITEM(res, 1); - Py_INCREF(op->defaults_kwdict); - #else - op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0); - if (unlikely(!op->defaults_tuple)) result = -1; - else { - op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1); - if (unlikely(!op->defaults_kwdict)) result = -1; - } - #endif - Py_DECREF(res); - return result; -} -static int -__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyTuple_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__defaults__ must be set to a tuple object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_tuple, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_tuple; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_tuple; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value) { - value = Py_None; - } else if (unlikely(value != Py_None && !PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__kwdefaults__ must be set to a dict object"); - return -1; - } - PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not " - "currently affect the values used in function calls", 1); - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->defaults_kwdict; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - if (op->defaults_getter) { - if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL; - result = op->defaults_kwdict; - } else { - result = Py_None; - } - } - Py_INCREF(result); - return result; -} -static int -__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - if (!value || value == Py_None) { - value = NULL; - } else if (unlikely(!PyDict_Check(value))) { - PyErr_SetString(PyExc_TypeError, - "__annotations__ must be set to a dict object"); - return -1; - } - Py_XINCREF(value); - __Pyx_Py_XDECREF_SET(op->func_annotations, value); - return 0; -} -static PyObject * -__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) { - PyObject* result = op->func_annotations; - CYTHON_UNUSED_VAR(context); - if (unlikely(!result)) { - result = PyDict_New(); - if (unlikely(!result)) return NULL; - op->func_annotations = result; - } - Py_INCREF(result); - return result; -} -static PyObject * -__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) { - int is_coroutine; - CYTHON_UNUSED_VAR(context); - if (op->func_is_coroutine) { - return __Pyx_NewRef(op->func_is_coroutine); - } - is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE; -#if PY_VERSION_HEX >= 0x03050000 - if (is_coroutine) { - PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine; - fromlist = PyList_New(1); - if (unlikely(!fromlist)) return NULL; - Py_INCREF(marker); -#if CYTHON_ASSUME_SAFE_MACROS - PyList_SET_ITEM(fromlist, 0, marker); -#else - if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) { - Py_DECREF(marker); - Py_DECREF(fromlist); - return NULL; - } -#endif - module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0); - Py_DECREF(fromlist); - if (unlikely(!module)) goto ignore; - op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker); - Py_DECREF(module); - if (likely(op->func_is_coroutine)) { - return __Pyx_NewRef(op->func_is_coroutine); - } -ignore: - PyErr_Clear(); - } -#endif - op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine); - return __Pyx_NewRef(op->func_is_coroutine); -} -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject * -__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_GetAttrString(op->func, "__module__"); -} -static int -__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) { - CYTHON_UNUSED_VAR(context); - return PyObject_SetAttrString(op->func, "__module__", value); -} -#endif -static PyGetSetDef __pyx_CyFunction_getsets[] = { - {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0}, - {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0}, - {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0}, - {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0}, - {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0}, - {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0}, - {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0}, - {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0}, - {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0}, - {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0}, - {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0}, -#if CYTHON_COMPILING_IN_LIMITED_API - {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyMemberDef __pyx_CyFunction_members[] = { -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0}, -#endif -#if CYTHON_USE_TYPE_SPECS - {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0}, -#if CYTHON_METH_FASTCALL -#if CYTHON_BACKPORT_VECTORCALL - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0}, -#else -#if !CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0}, -#endif -#endif -#endif -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0}, -#else - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0}, -#endif -#endif - {0, 0, 0, 0, 0} -}; -static PyObject * -__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args) -{ - CYTHON_UNUSED_VAR(args); -#if PY_MAJOR_VERSION >= 3 - Py_INCREF(m->func_qualname); - return m->func_qualname; -#else - return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name); -#endif -} -static PyMethodDef __pyx_CyFunction_methods[] = { - {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0}, - {0, 0, 0, 0} -}; -#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API -#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist) -#else -#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist) -#endif -static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { -#if !CYTHON_COMPILING_IN_LIMITED_API - PyCFunctionObject *cf = (PyCFunctionObject*) op; -#endif - if (unlikely(op == NULL)) - return NULL; -#if CYTHON_COMPILING_IN_LIMITED_API - op->func = PyCFunction_NewEx(ml, (PyObject*)op, module); - if (unlikely(!op->func)) return NULL; -#endif - op->flags = flags; - __Pyx_CyFunction_weakreflist(op) = NULL; -#if !CYTHON_COMPILING_IN_LIMITED_API - cf->m_ml = ml; - cf->m_self = (PyObject *) op; -#endif - Py_XINCREF(closure); - op->func_closure = closure; -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_XINCREF(module); - cf->m_module = module; -#endif - op->func_dict = NULL; - op->func_name = NULL; - Py_INCREF(qualname); - op->func_qualname = qualname; - op->func_doc = NULL; -#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API - op->func_classobj = NULL; -#else - ((PyCMethodObject*)op)->mm_class = NULL; -#endif - op->func_globals = globals; - Py_INCREF(op->func_globals); - Py_XINCREF(code); - op->func_code = code; - op->defaults_pyobjects = 0; - op->defaults_size = 0; - op->defaults = NULL; - op->defaults_tuple = NULL; - op->defaults_kwdict = NULL; - op->defaults_getter = NULL; - op->func_annotations = NULL; - op->func_is_coroutine = NULL; -#if CYTHON_METH_FASTCALL - switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { - case METH_NOARGS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS; - break; - case METH_O: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O; - break; - case METH_METHOD | METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD; - break; - case METH_FASTCALL | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS; - break; - case METH_VARARGS | METH_KEYWORDS: - __Pyx_CyFunction_func_vectorcall(op) = NULL; - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - Py_DECREF(op); - return NULL; - } -#endif - return (PyObject *) op; -} -static int -__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m) -{ - Py_CLEAR(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_CLEAR(m->func); -#else - Py_CLEAR(((PyCFunctionObject*)m)->m_module); -#endif - Py_CLEAR(m->func_dict); - Py_CLEAR(m->func_name); - Py_CLEAR(m->func_qualname); - Py_CLEAR(m->func_doc); - Py_CLEAR(m->func_globals); - Py_CLEAR(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API -#if PY_VERSION_HEX < 0x030900B1 - Py_CLEAR(__Pyx_CyFunction_GetClassObj(m)); -#else - { - PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class; - ((PyCMethodObject *) (m))->mm_class = NULL; - Py_XDECREF(cls); - } -#endif -#endif - Py_CLEAR(m->defaults_tuple); - Py_CLEAR(m->defaults_kwdict); - Py_CLEAR(m->func_annotations); - Py_CLEAR(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_XDECREF(pydefaults[i]); - PyObject_Free(m->defaults); - m->defaults = NULL; - } - return 0; -} -static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - if (__Pyx_CyFunction_weakreflist(m) != NULL) - PyObject_ClearWeakRefs((PyObject *) m); - __Pyx_CyFunction_clear(m); - __Pyx_PyHeapTypeObject_GC_Del(m); -} -static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m) -{ - PyObject_GC_UnTrack(m); - __Pyx__CyFunction_dealloc(m); -} -static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg) -{ - Py_VISIT(m->func_closure); -#if CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(m->func); -#else - Py_VISIT(((PyCFunctionObject*)m)->m_module); -#endif - Py_VISIT(m->func_dict); - Py_VISIT(m->func_name); - Py_VISIT(m->func_qualname); - Py_VISIT(m->func_doc); - Py_VISIT(m->func_globals); - Py_VISIT(m->func_code); -#if !CYTHON_COMPILING_IN_LIMITED_API - Py_VISIT(__Pyx_CyFunction_GetClassObj(m)); -#endif - Py_VISIT(m->defaults_tuple); - Py_VISIT(m->defaults_kwdict); - Py_VISIT(m->func_is_coroutine); - if (m->defaults) { - PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m); - int i; - for (i = 0; i < m->defaults_pyobjects; i++) - Py_VISIT(pydefaults[i]); - } - return 0; -} -static PyObject* -__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op) -{ -#if PY_MAJOR_VERSION >= 3 - return PyUnicode_FromFormat("", - op->func_qualname, (void *)op); -#else - return PyString_FromFormat("", - PyString_AsString(op->func_qualname), (void *)op); -#endif -} -static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) { -#if CYTHON_COMPILING_IN_LIMITED_API - PyObject *f = ((__pyx_CyFunctionObject*)func)->func; - PyObject *py_name = NULL; - PyCFunction meth; - int flags; - meth = PyCFunction_GetFunction(f); - if (unlikely(!meth)) return NULL; - flags = PyCFunction_GetFlags(f); - if (unlikely(flags < 0)) return NULL; -#else - PyCFunctionObject* f = (PyCFunctionObject*)func; - PyCFunction meth = f->m_ml->ml_meth; - int flags = f->m_ml->ml_flags; -#endif - Py_ssize_t size; - switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) { - case METH_VARARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) - return (*meth)(self, arg); - break; - case METH_VARARGS | METH_KEYWORDS: - return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw); - case METH_NOARGS: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 0)) - return (*meth)(self, NULL); -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - case METH_O: - if (likely(kw == NULL || PyDict_Size(kw) == 0)) { -#if CYTHON_ASSUME_SAFE_MACROS - size = PyTuple_GET_SIZE(arg); -#else - size = PyTuple_Size(arg); - if (unlikely(size < 0)) return NULL; -#endif - if (likely(size == 1)) { - PyObject *result, *arg0; - #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - arg0 = PyTuple_GET_ITEM(arg, 0); - #else - arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL; - #endif - result = (*meth)(self, arg0); - #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS) - Py_DECREF(arg0); - #endif - return result; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, - "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - py_name, size); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - f->m_ml->ml_name, size); -#endif - return NULL; - } - break; - default: - PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction"); - return NULL; - } -#if CYTHON_COMPILING_IN_LIMITED_API - py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL); - if (!py_name) return NULL; - PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments", - py_name); - Py_DECREF(py_name); -#else - PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments", - f->m_ml->ml_name); -#endif - return NULL; -} -static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) { - PyObject *self, *result; -#if CYTHON_COMPILING_IN_LIMITED_API - self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func); - if (unlikely(!self) && PyErr_Occurred()) return NULL; -#else - self = ((PyCFunctionObject*)func)->m_self; -#endif - result = __Pyx_CyFunction_CallMethod(func, self, arg, kw); - return result; -} -static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) { - PyObject *result; - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func; -#if CYTHON_METH_FASTCALL - __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc); - if (vc) { -#if CYTHON_ASSUME_SAFE_MACROS - return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw); -#else - (void) &__Pyx_PyVectorcall_FastCallDict; - return PyVectorcall_Call(func, args, kw); -#endif - } -#endif - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - Py_ssize_t argc; - PyObject *new_args; - PyObject *self; -#if CYTHON_ASSUME_SAFE_MACROS - argc = PyTuple_GET_SIZE(args); -#else - argc = PyTuple_Size(args); - if (unlikely(!argc) < 0) return NULL; -#endif - new_args = PyTuple_GetSlice(args, 1, argc); - if (unlikely(!new_args)) - return NULL; - self = PyTuple_GetItem(args, 0); - if (unlikely(!self)) { - Py_DECREF(new_args); -#if PY_MAJOR_VERSION > 2 - PyErr_Format(PyExc_TypeError, - "unbound method %.200S() needs an argument", - cyfunc->func_qualname); -#else - PyErr_SetString(PyExc_TypeError, - "unbound method needs an argument"); -#endif - return NULL; - } - result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw); - Py_DECREF(new_args); - } else { - result = __Pyx_CyFunction_Call(func, args, kw); - } - return result; -} -#if CYTHON_METH_FASTCALL -static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames) -{ - int ret = 0; - if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) { - if (unlikely(nargs < 1)) { - PyErr_Format(PyExc_TypeError, "%.200s() needs an argument", - ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - ret = 1; - } - if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name); - return -1; - } - return ret; -} -static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 0)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, NULL); -} -static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - if (unlikely(nargs != 1)) { - PyErr_Format(PyExc_TypeError, - "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)", - def->ml_name, nargs); - return NULL; - } - return def->ml_meth(self, args[0]); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames); -} -static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames) -{ - __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func; - PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml; - PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc); -#if CYTHON_BACKPORT_VECTORCALL - Py_ssize_t nargs = (Py_ssize_t)nargsf; -#else - Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); -#endif - PyObject *self; - switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) { - case 1: - self = args[0]; - args += 1; - nargs -= 1; - break; - case 0: - self = ((PyCFunctionObject*)cyfunc)->m_self; - break; - default: - return NULL; - } - return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames); -} -#endif -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_CyFunctionType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc}, - {Py_tp_repr, (void *)__Pyx_CyFunction_repr}, - {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod}, - {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse}, - {Py_tp_clear, (void *)__Pyx_CyFunction_clear}, - {Py_tp_methods, (void *)__pyx_CyFunction_methods}, - {Py_tp_members, (void *)__pyx_CyFunction_members}, - {Py_tp_getset, (void *)__pyx_CyFunction_getsets}, - {Py_tp_descr_get, (void *)__Pyx_PyMethod_New}, - {0, 0}, -}; -static PyType_Spec __pyx_CyFunctionType_spec = { - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL) - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - __pyx_CyFunctionType_slots -}; -#else -static PyTypeObject __pyx_CyFunctionType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "cython_function_or_method", - sizeof(__pyx_CyFunctionObject), - 0, - (destructor) __Pyx_CyFunction_dealloc, -#if !CYTHON_METH_FASTCALL - 0, -#elif CYTHON_BACKPORT_VECTORCALL - (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall), -#else - offsetof(PyCFunctionObject, vectorcall), -#endif - 0, - 0, -#if PY_MAJOR_VERSION < 3 - 0, -#else - 0, -#endif - (reprfunc) __Pyx_CyFunction_repr, - 0, - 0, - 0, - 0, - __Pyx_CyFunction_CallAsMethod, - 0, - 0, - 0, - 0, -#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR - Py_TPFLAGS_METHOD_DESCRIPTOR | -#endif -#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL - _Py_TPFLAGS_HAVE_VECTORCALL | -#endif - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, - 0, - (traverseproc) __Pyx_CyFunction_traverse, - (inquiry) __Pyx_CyFunction_clear, - 0, -#if PY_VERSION_HEX < 0x030500A0 - offsetof(__pyx_CyFunctionObject, func_weakreflist), -#else - offsetof(PyCFunctionObject, m_weakreflist), -#endif - 0, - 0, - __pyx_CyFunction_methods, - __pyx_CyFunction_members, - __pyx_CyFunction_getsets, - 0, - 0, - __Pyx_PyMethod_New, - 0, - offsetof(__pyx_CyFunctionObject, func_dict), - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_CyFunction_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type); -#endif - if (unlikely(__pyx_CyFunctionType == NULL)) { - return -1; - } - return 0; -} -static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults = PyObject_Malloc(size); - if (unlikely(!m->defaults)) - return PyErr_NoMemory(); - memset(m->defaults, 0, size); - m->defaults_pyobjects = pyobjects; - m->defaults_size = size; - return m->defaults; -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_tuple = tuple; - Py_INCREF(tuple); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->defaults_kwdict = dict; - Py_INCREF(dict); -} -static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) { - __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func; - m->func_annotations = dict; - Py_INCREF(dict); -} - -/* CythonFunction */ -static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname, - PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) { - PyObject *op = __Pyx_CyFunction_Init( - PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType), - ml, flags, qualname, closure, module, globals, code - ); - if (likely(op)) { - PyObject_GC_Track(op); - } - return op; -} - -/* GetNameInClass */ -static PyObject *__Pyx__GetNameInClass(PyObject *nmspace, PyObject *name) { - PyObject *result; - PyObject *dict; - assert(PyType_Check(nmspace)); -#if CYTHON_USE_TYPE_SLOTS - dict = ((PyTypeObject*)nmspace)->tp_dict; - Py_XINCREF(dict); -#else - dict = PyObject_GetAttr(nmspace, __pyx_n_s_dict); -#endif - if (likely(dict)) { - result = PyObject_GetItem(dict, name); - Py_DECREF(dict); - if (result) { - return result; - } - } - PyErr_Clear(); - __Pyx_GetModuleGlobalNameUncached(result, name); - return result; -} - -/* CLineInTraceback */ -#ifndef CYTHON_CLINE_IN_TRACEBACK -static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) { - PyObject *use_cline; - PyObject *ptype, *pvalue, *ptraceback; -#if CYTHON_COMPILING_IN_CPYTHON - PyObject **cython_runtime_dict; -#endif - CYTHON_MAYBE_UNUSED_VAR(tstate); - if (unlikely(!__pyx_cython_runtime)) { - return c_line; - } - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); -#if CYTHON_COMPILING_IN_CPYTHON - cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime); - if (likely(cython_runtime_dict)) { - __PYX_PY_DICT_LOOKUP_IF_MODIFIED( - use_cline, *cython_runtime_dict, - __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback)) - } else -#endif - { - PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback); - if (use_cline_obj) { - use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True; - Py_DECREF(use_cline_obj); - } else { - PyErr_Clear(); - use_cline = NULL; - } - } - if (!use_cline) { - c_line = 0; - (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False); - } - else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) { - c_line = 0; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - return c_line; -} -#endif - -/* CodeObjectCache */ -#if !CYTHON_COMPILING_IN_LIMITED_API -static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { - int start = 0, mid = 0, end = count - 1; - if (end >= 0 && code_line > entries[end].code_line) { - return count; - } - while (start < end) { - mid = start + (end - start) / 2; - if (code_line < entries[mid].code_line) { - end = mid; - } else if (code_line > entries[mid].code_line) { - start = mid + 1; - } else { - return mid; - } - } - if (code_line <= entries[mid].code_line) { - return mid; - } else { - return mid + 1; - } -} -static PyCodeObject *__pyx_find_code_object(int code_line) { - PyCodeObject* code_object; - int pos; - if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { - return NULL; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { - return NULL; - } - code_object = __pyx_code_cache.entries[pos].code_object; - Py_INCREF(code_object); - return code_object; -} -static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { - int pos, i; - __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; - if (unlikely(!code_line)) { - return; - } - if (unlikely(!entries)) { - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); - if (likely(entries)) { - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = 64; - __pyx_code_cache.count = 1; - entries[0].code_line = code_line; - entries[0].code_object = code_object; - Py_INCREF(code_object); - } - return; - } - pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); - if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { - PyCodeObject* tmp = entries[pos].code_object; - entries[pos].code_object = code_object; - Py_DECREF(tmp); - return; - } - if (__pyx_code_cache.count == __pyx_code_cache.max_count) { - int new_max = __pyx_code_cache.max_count + 64; - entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( - __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry)); - if (unlikely(!entries)) { - return; - } - __pyx_code_cache.entries = entries; - __pyx_code_cache.max_count = new_max; - } - for (i=__pyx_code_cache.count; i>pos; i--) { - entries[i] = entries[i-1]; - } - entries[pos].code_line = code_line; - entries[pos].code_object = code_object; - __pyx_code_cache.count++; - Py_INCREF(code_object); -} -#endif - -/* AddTraceback */ -#include "compile.h" -#include "frameobject.h" -#include "traceback.h" -#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#if CYTHON_COMPILING_IN_LIMITED_API -static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict, - PyObject *firstlineno, PyObject *name) { - PyObject *replace = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL; - replace = PyObject_GetAttrString(code, "replace"); - if (likely(replace)) { - PyObject *result; - result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict); - Py_DECREF(replace); - return result; - } - #if __PYX_LIMITED_VERSION_HEX < 0x030780000 - PyErr_Clear(); - { - PyObject *compiled = NULL, *result = NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL; - if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL; - compiled = Py_CompileString( - "out = type(code)(\n" - " code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n" - " code.co_flags, code.co_code, code.co_consts, code.co_names,\n" - " code.co_varnames, code.co_filename, co_name, co_firstlineno,\n" - " code.co_lnotab)\n", "", Py_file_input); - if (!compiled) return NULL; - result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict); - Py_DECREF(compiled); - if (!result) PyErr_Print(); - Py_DECREF(result); - result = PyDict_GetItemString(scratch_dict, "out"); - if (result) Py_INCREF(result); - return result; - } - #endif -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL; - PyObject *replace = NULL, *getframe = NULL, *frame = NULL; - PyObject *exc_type, *exc_value, *exc_traceback; - int success = 0; - if (c_line) { - (void) __pyx_cfilenm; - (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line); - } - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); - code_object = Py_CompileString("_getframe()", filename, Py_eval_input); - if (unlikely(!code_object)) goto bad; - py_py_line = PyLong_FromLong(py_line); - if (unlikely(!py_py_line)) goto bad; - py_funcname = PyUnicode_FromString(funcname); - if (unlikely(!py_funcname)) goto bad; - dict = PyDict_New(); - if (unlikely(!dict)) goto bad; - { - PyObject *old_code_object = code_object; - code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname); - Py_DECREF(old_code_object); - } - if (unlikely(!code_object)) goto bad; - getframe = PySys_GetObject("_getframe"); - if (unlikely(!getframe)) goto bad; - if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad; - frame = PyEval_EvalCode(code_object, dict, dict); - if (unlikely(!frame) || frame == Py_None) goto bad; - success = 1; - bad: - PyErr_Restore(exc_type, exc_value, exc_traceback); - Py_XDECREF(code_object); - Py_XDECREF(py_py_line); - Py_XDECREF(py_funcname); - Py_XDECREF(dict); - Py_XDECREF(replace); - if (success) { - PyTraceBack_Here( - (struct _frame*)frame); - } - Py_XDECREF(frame); -} -#else -static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( - const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = NULL; - PyObject *py_funcname = NULL; - #if PY_MAJOR_VERSION < 3 - PyObject *py_srcfile = NULL; - py_srcfile = PyString_FromString(filename); - if (!py_srcfile) goto bad; - #endif - if (c_line) { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - #else - py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); - if (!py_funcname) goto bad; - funcname = PyUnicode_AsUTF8(py_funcname); - if (!funcname) goto bad; - #endif - } - else { - #if PY_MAJOR_VERSION < 3 - py_funcname = PyString_FromString(funcname); - if (!py_funcname) goto bad; - #endif - } - #if PY_MAJOR_VERSION < 3 - py_code = __Pyx_PyCode_New( - 0, - 0, - 0, - 0, - 0, - 0, - __pyx_empty_bytes, /*PyObject *code,*/ - __pyx_empty_tuple, /*PyObject *consts,*/ - __pyx_empty_tuple, /*PyObject *names,*/ - __pyx_empty_tuple, /*PyObject *varnames,*/ - __pyx_empty_tuple, /*PyObject *freevars,*/ - __pyx_empty_tuple, /*PyObject *cellvars,*/ - py_srcfile, /*PyObject *filename,*/ - py_funcname, /*PyObject *name,*/ - py_line, - __pyx_empty_bytes /*PyObject *lnotab*/ - ); - Py_DECREF(py_srcfile); - #else - py_code = PyCode_NewEmpty(filename, funcname, py_line); - #endif - Py_XDECREF(py_funcname); // XDECREF since it's only set on Py3 if cline - return py_code; -bad: - Py_XDECREF(py_funcname); - #if PY_MAJOR_VERSION < 3 - Py_XDECREF(py_srcfile); - #endif - return NULL; -} -static void __Pyx_AddTraceback(const char *funcname, int c_line, - int py_line, const char *filename) { - PyCodeObject *py_code = 0; - PyFrameObject *py_frame = 0; - PyThreadState *tstate = __Pyx_PyThreadState_Current; - PyObject *ptype, *pvalue, *ptraceback; - if (c_line) { - c_line = __Pyx_CLineForTraceback(tstate, c_line); - } - py_code = __pyx_find_code_object(c_line ? -c_line : py_line); - if (!py_code) { - __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback); - py_code = __Pyx_CreateCodeObjectForTraceback( - funcname, c_line, py_line, filename); - if (!py_code) { - /* If the code object creation fails, then we should clear the - fetched exception references and propagate the new exception */ - Py_XDECREF(ptype); - Py_XDECREF(pvalue); - Py_XDECREF(ptraceback); - goto bad; - } - __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback); - __pyx_insert_code_object(c_line ? -c_line : py_line, py_code); - } - py_frame = PyFrame_New( - tstate, /*PyThreadState *tstate,*/ - py_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (!py_frame) goto bad; - __Pyx_PyFrame_SetLineNumber(py_frame, py_line); - PyTraceBack_Here(py_frame); -bad: - Py_XDECREF(py_code); - Py_XDECREF(py_frame); -} -#endif - -#if PY_MAJOR_VERSION < 3 -static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) { - __Pyx_TypeName obj_type_name; - if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags); - if (__Pyx_TypeCheck(obj, __pyx_ptype_9csimdjson_ArrayBuffer)) return __pyx_pw_9csimdjson_11ArrayBuffer_5__getbuffer__(obj, view, flags); - if (__Pyx_TypeCheck(obj, __pyx_array_type)) return __pyx_array_getbuffer(obj, view, flags); - if (__Pyx_TypeCheck(obj, __pyx_memoryview_type)) return __pyx_memoryview_getbuffer(obj, view, flags); - obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj)); - PyErr_Format(PyExc_TypeError, - "'" __Pyx_FMT_TYPENAME "' does not have the buffer interface", - obj_type_name); - __Pyx_DECREF_TypeName(obj_type_name); - return -1; -} -static void __Pyx_ReleaseBuffer(Py_buffer *view) { - PyObject *obj = view->obj; - if (!obj) return; - if (PyObject_CheckBuffer(obj)) { - PyBuffer_Release(view); - return; - } - if ((0)) {} - else if (__Pyx_TypeCheck(obj, __pyx_ptype_9csimdjson_ArrayBuffer)) __pyx_pw_9csimdjson_11ArrayBuffer_7__releasebuffer__(obj, view); - view->obj = NULL; - Py_DECREF(obj); -} -#endif - - -/* MemviewSliceIsContig */ -static int -__pyx_memviewslice_is_contig(const __Pyx_memviewslice mvs, char order, int ndim) -{ - int i, index, step, start; - Py_ssize_t itemsize = mvs.memview->view.itemsize; - if (order == 'F') { - step = 1; - start = 0; - } else { - step = -1; - start = ndim - 1; - } - for (i = 0; i < ndim; i++) { - index = start + step * i; - if (mvs.suboffsets[index] >= 0 || mvs.strides[index] != itemsize) - return 0; - itemsize *= mvs.shape[index]; - } - return 1; -} - -/* OverlappingSlices */ -static void -__pyx_get_array_memory_extents(__Pyx_memviewslice *slice, - void **out_start, void **out_end, - int ndim, size_t itemsize) -{ - char *start, *end; - int i; - start = end = slice->data; - for (i = 0; i < ndim; i++) { - Py_ssize_t stride = slice->strides[i]; - Py_ssize_t extent = slice->shape[i]; - if (extent == 0) { - *out_start = *out_end = start; - return; - } else { - if (stride > 0) - end += stride * (extent - 1); - else - start += stride * (extent - 1); - } - } - *out_start = start; - *out_end = end + itemsize; -} -static int -__pyx_slices_overlap(__Pyx_memviewslice *slice1, - __Pyx_memviewslice *slice2, - int ndim, size_t itemsize) -{ - void *start1, *end1, *start2, *end2; - __pyx_get_array_memory_extents(slice1, &start1, &end1, ndim, itemsize); - __pyx_get_array_memory_extents(slice2, &start2, &end2, ndim, itemsize); - return (start1 < end2) && (start2 < end1); -} - -/* CIntFromPyVerify */ -#define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) -#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ - __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) -#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ - {\ - func_type value = func_value;\ - if (sizeof(target_type) < sizeof(func_type)) {\ - if (unlikely(value != (func_type) (target_type) value)) {\ - func_type zero = 0;\ - if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ - return (target_type) -1;\ - if (is_unsigned && unlikely(value < zero))\ - goto raise_neg_overflow;\ - else\ - goto raise_overflow;\ - }\ - }\ - return (target_type) value;\ - } - -/* IsLittleEndian */ -static CYTHON_INLINE int __Pyx_Is_Little_Endian(void) -{ - union { - uint32_t u32; - uint8_t u8[4]; - } S; - S.u32 = 0x01020304; - return S.u8[0] == 4; -} - -/* BufferFormatCheck */ -static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx, - __Pyx_BufFmt_StackElem* stack, - __Pyx_TypeInfo* type) { - stack[0].field = &ctx->root; - stack[0].parent_offset = 0; - ctx->root.type = type; - ctx->root.name = "buffer dtype"; - ctx->root.offset = 0; - ctx->head = stack; - ctx->head->field = &ctx->root; - ctx->fmt_offset = 0; - ctx->head->parent_offset = 0; - ctx->new_packmode = '@'; - ctx->enc_packmode = '@'; - ctx->new_count = 1; - ctx->enc_count = 0; - ctx->enc_type = 0; - ctx->is_complex = 0; - ctx->is_valid_array = 0; - ctx->struct_alignment = 0; - while (type->typegroup == 'S') { - ++ctx->head; - ctx->head->field = type->fields; - ctx->head->parent_offset = 0; - type = type->fields->type; - } -} -static int __Pyx_BufFmt_ParseNumber(const char** ts) { - int count; - const char* t = *ts; - if (*t < '0' || *t > '9') { - return -1; - } else { - count = *t++ - '0'; - while (*t >= '0' && *t <= '9') { - count *= 10; - count += *t++ - '0'; - } - } - *ts = t; - return count; -} -static int __Pyx_BufFmt_ExpectNumber(const char **ts) { - int number = __Pyx_BufFmt_ParseNumber(ts); - if (number == -1) - PyErr_Format(PyExc_ValueError,\ - "Does not understand character buffer dtype format string ('%c')", **ts); - return number; -} -static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) { - PyErr_Format(PyExc_ValueError, - "Unexpected format string character: '%c'", ch); -} -static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) { - switch (ch) { - case '?': return "'bool'"; - case 'c': return "'char'"; - case 'b': return "'signed char'"; - case 'B': return "'unsigned char'"; - case 'h': return "'short'"; - case 'H': return "'unsigned short'"; - case 'i': return "'int'"; - case 'I': return "'unsigned int'"; - case 'l': return "'long'"; - case 'L': return "'unsigned long'"; - case 'q': return "'long long'"; - case 'Q': return "'unsigned long long'"; - case 'f': return (is_complex ? "'complex float'" : "'float'"); - case 'd': return (is_complex ? "'complex double'" : "'double'"); - case 'g': return (is_complex ? "'complex long double'" : "'long double'"); - case 'T': return "a struct"; - case 'O': return "Python object"; - case 'P': return "a pointer"; - case 's': case 'p': return "a string"; - case 0: return "end"; - default: return "unparsable format string"; - } -} -static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) { - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return 2; - case 'i': case 'I': case 'l': case 'L': return 4; - case 'q': case 'Q': return 8; - case 'f': return (is_complex ? 8 : 4); - case 'd': return (is_complex ? 16 : 8); - case 'g': { - PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g').."); - return 0; - } - case 'O': case 'P': return sizeof(void*); - default: - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } -} -static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) { - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return sizeof(short); - case 'i': case 'I': return sizeof(int); - case 'l': case 'L': return sizeof(long); - #ifdef HAVE_LONG_LONG - case 'q': case 'Q': return sizeof(PY_LONG_LONG); - #endif - case 'f': return sizeof(float) * (is_complex ? 2 : 1); - case 'd': return sizeof(double) * (is_complex ? 2 : 1); - case 'g': return sizeof(long double) * (is_complex ? 2 : 1); - case 'O': case 'P': return sizeof(void*); - default: { - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } - } -} -typedef struct { char c; short x; } __Pyx_st_short; -typedef struct { char c; int x; } __Pyx_st_int; -typedef struct { char c; long x; } __Pyx_st_long; -typedef struct { char c; float x; } __Pyx_st_float; -typedef struct { char c; double x; } __Pyx_st_double; -typedef struct { char c; long double x; } __Pyx_st_longdouble; -typedef struct { char c; void *x; } __Pyx_st_void_p; -#ifdef HAVE_LONG_LONG -typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong; -#endif -static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) { - CYTHON_UNUSED_VAR(is_complex); - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short); - case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int); - case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long); -#ifdef HAVE_LONG_LONG - case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG); -#endif - case 'f': return sizeof(__Pyx_st_float) - sizeof(float); - case 'd': return sizeof(__Pyx_st_double) - sizeof(double); - case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double); - case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*); - default: - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } -} -/* These are for computing the padding at the end of the struct to align - on the first member of the struct. This will probably the same as above, - but we don't have any guarantees. - */ -typedef struct { short x; char c; } __Pyx_pad_short; -typedef struct { int x; char c; } __Pyx_pad_int; -typedef struct { long x; char c; } __Pyx_pad_long; -typedef struct { float x; char c; } __Pyx_pad_float; -typedef struct { double x; char c; } __Pyx_pad_double; -typedef struct { long double x; char c; } __Pyx_pad_longdouble; -typedef struct { void *x; char c; } __Pyx_pad_void_p; -#ifdef HAVE_LONG_LONG -typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong; -#endif -static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) { - CYTHON_UNUSED_VAR(is_complex); - switch (ch) { - case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1; - case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short); - case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int); - case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long); -#ifdef HAVE_LONG_LONG - case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG); -#endif - case 'f': return sizeof(__Pyx_pad_float) - sizeof(float); - case 'd': return sizeof(__Pyx_pad_double) - sizeof(double); - case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double); - case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*); - default: - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } -} -static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) { - switch (ch) { - case 'c': - return 'H'; - case 'b': case 'h': case 'i': - case 'l': case 'q': case 's': case 'p': - return 'I'; - case '?': case 'B': case 'H': case 'I': case 'L': case 'Q': - return 'U'; - case 'f': case 'd': case 'g': - return (is_complex ? 'C' : 'R'); - case 'O': - return 'O'; - case 'P': - return 'P'; - default: { - __Pyx_BufFmt_RaiseUnexpectedChar(ch); - return 0; - } - } -} -static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) { - if (ctx->head == NULL || ctx->head->field == &ctx->root) { - const char* expected; - const char* quote; - if (ctx->head == NULL) { - expected = "end"; - quote = ""; - } else { - expected = ctx->head->field->type->name; - quote = "'"; - } - PyErr_Format(PyExc_ValueError, - "Buffer dtype mismatch, expected %s%s%s but got %s", - quote, expected, quote, - __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex)); - } else { - __Pyx_StructField* field = ctx->head->field; - __Pyx_StructField* parent = (ctx->head - 1)->field; - PyErr_Format(PyExc_ValueError, - "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'", - field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex), - parent->type->name, field->name); - } -} -static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) { - char group; - size_t size, offset, arraysize = 1; - if (ctx->enc_type == 0) return 0; - if (ctx->head->field->type->arraysize[0]) { - int i, ndim = 0; - if (ctx->enc_type == 's' || ctx->enc_type == 'p') { - ctx->is_valid_array = ctx->head->field->type->ndim == 1; - ndim = 1; - if (ctx->enc_count != ctx->head->field->type->arraysize[0]) { - PyErr_Format(PyExc_ValueError, - "Expected a dimension of size %zu, got %zu", - ctx->head->field->type->arraysize[0], ctx->enc_count); - return -1; - } - } - if (!ctx->is_valid_array) { - PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d", - ctx->head->field->type->ndim, ndim); - return -1; - } - for (i = 0; i < ctx->head->field->type->ndim; i++) { - arraysize *= ctx->head->field->type->arraysize[i]; - } - ctx->is_valid_array = 0; - ctx->enc_count = 1; - } - group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex); - do { - __Pyx_StructField* field = ctx->head->field; - __Pyx_TypeInfo* type = field->type; - if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') { - size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex); - } else { - size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex); - } - if (ctx->enc_packmode == '@') { - size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex); - size_t align_mod_offset; - if (align_at == 0) return -1; - align_mod_offset = ctx->fmt_offset % align_at; - if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset; - if (ctx->struct_alignment == 0) - ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type, - ctx->is_complex); - } - if (type->size != size || type->typegroup != group) { - if (type->typegroup == 'C' && type->fields != NULL) { - size_t parent_offset = ctx->head->parent_offset + field->offset; - ++ctx->head; - ctx->head->field = type->fields; - ctx->head->parent_offset = parent_offset; - continue; - } - if ((type->typegroup == 'H' || group == 'H') && type->size == size) { - } else { - __Pyx_BufFmt_RaiseExpected(ctx); - return -1; - } - } - offset = ctx->head->parent_offset + field->offset; - if (ctx->fmt_offset != offset) { - PyErr_Format(PyExc_ValueError, - "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected", - (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset); - return -1; - } - ctx->fmt_offset += size; - if (arraysize) - ctx->fmt_offset += (arraysize - 1) * size; - --ctx->enc_count; - while (1) { - if (field == &ctx->root) { - ctx->head = NULL; - if (ctx->enc_count != 0) { - __Pyx_BufFmt_RaiseExpected(ctx); - return -1; - } - break; - } - ctx->head->field = ++field; - if (field->type == NULL) { - --ctx->head; - field = ctx->head->field; - continue; - } else if (field->type->typegroup == 'S') { - size_t parent_offset = ctx->head->parent_offset + field->offset; - if (field->type->fields->type == NULL) continue; - field = field->type->fields; - ++ctx->head; - ctx->head->field = field; - ctx->head->parent_offset = parent_offset; - break; - } else { - break; - } - } - } while (ctx->enc_count); - ctx->enc_type = 0; - ctx->is_complex = 0; - return 0; -} -static int -__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp) -{ - const char *ts = *tsp; - int i = 0, number, ndim; - ++ts; - if (ctx->new_count != 1) { - PyErr_SetString(PyExc_ValueError, - "Cannot handle repeated arrays in format string"); - return -1; - } - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return -1; - ndim = ctx->head->field->type->ndim; - while (*ts && *ts != ')') { - switch (*ts) { - case ' ': case '\f': case '\r': case '\n': case '\t': case '\v': continue; - default: break; - } - number = __Pyx_BufFmt_ExpectNumber(&ts); - if (number == -1) return -1; - if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) { - PyErr_Format(PyExc_ValueError, - "Expected a dimension of size %zu, got %d", - ctx->head->field->type->arraysize[i], number); - return -1; - } - if (*ts != ',' && *ts != ')') { - PyErr_Format(PyExc_ValueError, - "Expected a comma in format string, got '%c'", *ts); - return -1; - } - if (*ts == ',') ts++; - i++; - } - if (i != ndim) { - PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d", - ctx->head->field->type->ndim, i); - return -1; - } - if (!*ts) { - PyErr_SetString(PyExc_ValueError, - "Unexpected end of format string, expected ')'"); - return -1; - } - ctx->is_valid_array = 1; - ctx->new_count = 1; - *tsp = ++ts; - return 0; -} -static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) { - int got_Z = 0; - while (1) { - switch(*ts) { - case 0: - if (ctx->enc_type != 0 && ctx->head == NULL) { - __Pyx_BufFmt_RaiseExpected(ctx); - return NULL; - } - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - if (ctx->head != NULL) { - __Pyx_BufFmt_RaiseExpected(ctx); - return NULL; - } - return ts; - case ' ': - case '\r': - case '\n': - ++ts; - break; - case '<': - if (!__Pyx_Is_Little_Endian()) { - PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler"); - return NULL; - } - ctx->new_packmode = '='; - ++ts; - break; - case '>': - case '!': - if (__Pyx_Is_Little_Endian()) { - PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler"); - return NULL; - } - ctx->new_packmode = '='; - ++ts; - break; - case '=': - case '@': - case '^': - ctx->new_packmode = *ts++; - break; - case 'T': - { - const char* ts_after_sub; - size_t i, struct_count = ctx->new_count; - size_t struct_alignment = ctx->struct_alignment; - ctx->new_count = 1; - ++ts; - if (*ts != '{') { - PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'"); - return NULL; - } - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->enc_type = 0; - ctx->enc_count = 0; - ctx->struct_alignment = 0; - ++ts; - ts_after_sub = ts; - for (i = 0; i != struct_count; ++i) { - ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts); - if (!ts_after_sub) return NULL; - } - ts = ts_after_sub; - if (struct_alignment) ctx->struct_alignment = struct_alignment; - } - break; - case '}': - { - size_t alignment = ctx->struct_alignment; - ++ts; - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->enc_type = 0; - if (alignment && ctx->fmt_offset % alignment) { - ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment); - } - } - return ts; - case 'x': - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->fmt_offset += ctx->new_count; - ctx->new_count = 1; - ctx->enc_count = 0; - ctx->enc_type = 0; - ctx->enc_packmode = ctx->new_packmode; - ++ts; - break; - case 'Z': - got_Z = 1; - ++ts; - if (*ts != 'f' && *ts != 'd' && *ts != 'g') { - __Pyx_BufFmt_RaiseUnexpectedChar('Z'); - return NULL; - } - CYTHON_FALLTHROUGH; - case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I': - case 'l': case 'L': case 'q': case 'Q': - case 'f': case 'd': case 'g': - case 'O': case 'p': - if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) && - (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) { - ctx->enc_count += ctx->new_count; - ctx->new_count = 1; - got_Z = 0; - ++ts; - break; - } - CYTHON_FALLTHROUGH; - case 's': - if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL; - ctx->enc_count = ctx->new_count; - ctx->enc_packmode = ctx->new_packmode; - ctx->enc_type = *ts; - ctx->is_complex = got_Z; - ++ts; - ctx->new_count = 1; - got_Z = 0; - break; - case ':': - ++ts; - while(*ts != ':') ++ts; - ++ts; - break; - case '(': - if (__pyx_buffmt_parse_array(ctx, &ts) < 0) return NULL; - break; - default: - { - int number = __Pyx_BufFmt_ExpectNumber(&ts); - if (number == -1) return NULL; - ctx->new_count = (size_t)number; - } - } - } -} - -/* TypeInfoCompare */ - static int -__pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b) -{ - int i; - if (!a || !b) - return 0; - if (a == b) - return 1; - if (a->size != b->size || a->typegroup != b->typegroup || - a->is_unsigned != b->is_unsigned || a->ndim != b->ndim) { - if (a->typegroup == 'H' || b->typegroup == 'H') { - return a->size == b->size; - } else { - return 0; - } - } - if (a->ndim) { - for (i = 0; i < a->ndim; i++) - if (a->arraysize[i] != b->arraysize[i]) - return 0; - } - if (a->typegroup == 'S') { - if (a->flags != b->flags) - return 0; - if (a->fields || b->fields) { - if (!(a->fields && b->fields)) - return 0; - for (i = 0; a->fields[i].type && b->fields[i].type; i++) { - __Pyx_StructField *field_a = a->fields + i; - __Pyx_StructField *field_b = b->fields + i; - if (field_a->offset != field_b->offset || - !__pyx_typeinfo_cmp(field_a->type, field_b->type)) - return 0; - } - return !a->fields[i].type && !b->fields[i].type; - } - } - return 1; -} - -/* MemviewSliceValidateAndInit */ - static int -__pyx_check_strides(Py_buffer *buf, int dim, int ndim, int spec) -{ - if (buf->shape[dim] <= 1) - return 1; - if (buf->strides) { - if (spec & __Pyx_MEMVIEW_CONTIG) { - if (spec & (__Pyx_MEMVIEW_PTR|__Pyx_MEMVIEW_FULL)) { - if (unlikely(buf->strides[dim] != sizeof(void *))) { - PyErr_Format(PyExc_ValueError, - "Buffer is not indirectly contiguous " - "in dimension %d.", dim); - goto fail; - } - } else if (unlikely(buf->strides[dim] != buf->itemsize)) { - PyErr_SetString(PyExc_ValueError, - "Buffer and memoryview are not contiguous " - "in the same dimension."); - goto fail; - } - } - if (spec & __Pyx_MEMVIEW_FOLLOW) { - Py_ssize_t stride = buf->strides[dim]; - if (stride < 0) - stride = -stride; - if (unlikely(stride < buf->itemsize)) { - PyErr_SetString(PyExc_ValueError, - "Buffer and memoryview are not contiguous " - "in the same dimension."); - goto fail; - } - } - } else { - if (unlikely(spec & __Pyx_MEMVIEW_CONTIG && dim != ndim - 1)) { - PyErr_Format(PyExc_ValueError, - "C-contiguous buffer is not contiguous in " - "dimension %d", dim); - goto fail; - } else if (unlikely(spec & (__Pyx_MEMVIEW_PTR))) { - PyErr_Format(PyExc_ValueError, - "C-contiguous buffer is not indirect in " - "dimension %d", dim); - goto fail; - } else if (unlikely(buf->suboffsets)) { - PyErr_SetString(PyExc_ValueError, - "Buffer exposes suboffsets but no strides"); - goto fail; - } - } - return 1; -fail: - return 0; -} -static int -__pyx_check_suboffsets(Py_buffer *buf, int dim, int ndim, int spec) -{ - CYTHON_UNUSED_VAR(ndim); - if (spec & __Pyx_MEMVIEW_DIRECT) { - if (unlikely(buf->suboffsets && buf->suboffsets[dim] >= 0)) { - PyErr_Format(PyExc_ValueError, - "Buffer not compatible with direct access " - "in dimension %d.", dim); - goto fail; - } - } - if (spec & __Pyx_MEMVIEW_PTR) { - if (unlikely(!buf->suboffsets || (buf->suboffsets[dim] < 0))) { - PyErr_Format(PyExc_ValueError, - "Buffer is not indirectly accessible " - "in dimension %d.", dim); - goto fail; - } - } - return 1; -fail: - return 0; -} -static int -__pyx_verify_contig(Py_buffer *buf, int ndim, int c_or_f_flag) -{ - int i; - if (c_or_f_flag & __Pyx_IS_F_CONTIG) { - Py_ssize_t stride = 1; - for (i = 0; i < ndim; i++) { - if (unlikely(stride * buf->itemsize != buf->strides[i] && buf->shape[i] > 1)) { - PyErr_SetString(PyExc_ValueError, - "Buffer not fortran contiguous."); - goto fail; - } - stride = stride * buf->shape[i]; - } - } else if (c_or_f_flag & __Pyx_IS_C_CONTIG) { - Py_ssize_t stride = 1; - for (i = ndim - 1; i >- 1; i--) { - if (unlikely(stride * buf->itemsize != buf->strides[i] && buf->shape[i] > 1)) { - PyErr_SetString(PyExc_ValueError, - "Buffer not C contiguous."); - goto fail; - } - stride = stride * buf->shape[i]; - } - } - return 1; -fail: - return 0; -} -static int __Pyx_ValidateAndInit_memviewslice( - int *axes_specs, - int c_or_f_flag, - int buf_flags, - int ndim, - __Pyx_TypeInfo *dtype, - __Pyx_BufFmt_StackElem stack[], - __Pyx_memviewslice *memviewslice, - PyObject *original_obj) -{ - struct __pyx_memoryview_obj *memview, *new_memview; - __Pyx_RefNannyDeclarations - Py_buffer *buf; - int i, spec = 0, retval = -1; - __Pyx_BufFmt_Context ctx; - int from_memoryview = __pyx_memoryview_check(original_obj); - __Pyx_RefNannySetupContext("ValidateAndInit_memviewslice", 0); - if (from_memoryview && __pyx_typeinfo_cmp(dtype, ((struct __pyx_memoryview_obj *) - original_obj)->typeinfo)) { - memview = (struct __pyx_memoryview_obj *) original_obj; - new_memview = NULL; - } else { - memview = (struct __pyx_memoryview_obj *) __pyx_memoryview_new( - original_obj, buf_flags, 0, dtype); - new_memview = memview; - if (unlikely(!memview)) - goto fail; - } - buf = &memview->view; - if (unlikely(buf->ndim != ndim)) { - PyErr_Format(PyExc_ValueError, - "Buffer has wrong number of dimensions (expected %d, got %d)", - ndim, buf->ndim); - goto fail; - } - if (new_memview) { - __Pyx_BufFmt_Init(&ctx, stack, dtype); - if (unlikely(!__Pyx_BufFmt_CheckString(&ctx, buf->format))) goto fail; - } - if (unlikely((unsigned) buf->itemsize != dtype->size)) { - PyErr_Format(PyExc_ValueError, - "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "u byte%s) " - "does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "u byte%s)", - buf->itemsize, - (buf->itemsize > 1) ? "s" : "", - dtype->name, - dtype->size, - (dtype->size > 1) ? "s" : ""); - goto fail; - } - if (buf->len > 0) { - for (i = 0; i < ndim; i++) { - spec = axes_specs[i]; - if (unlikely(!__pyx_check_strides(buf, i, ndim, spec))) - goto fail; - if (unlikely(!__pyx_check_suboffsets(buf, i, ndim, spec))) - goto fail; - } - if (unlikely(buf->strides && !__pyx_verify_contig(buf, ndim, c_or_f_flag))) - goto fail; - } - if (unlikely(__Pyx_init_memviewslice(memview, ndim, memviewslice, - new_memview != NULL) == -1)) { - goto fail; - } - retval = 0; - goto no_fail; -fail: - Py_XDECREF(new_memview); - retval = -1; -no_fail: - __Pyx_RefNannyFinishContext(); - return retval; -} - -/* ObjectToMemviewSlice */ - static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_dc_unsigned_char__const__(PyObject *obj, int writable_flag) { - __Pyx_memviewslice result = { 0, 0, { 0 }, { 0 }, { 0 } }; - __Pyx_BufFmt_StackElem stack[1]; - int axes_specs[] = { (__Pyx_MEMVIEW_DIRECT | __Pyx_MEMVIEW_CONTIG) }; - int retcode; - if (obj == Py_None) { - result.memview = (struct __pyx_memoryview_obj *) Py_None; - return result; - } - retcode = __Pyx_ValidateAndInit_memviewslice(axes_specs, __Pyx_IS_C_CONTIG, - (PyBUF_C_CONTIGUOUS | PyBUF_FORMAT) | writable_flag, 1, - &__Pyx_TypeInfo_unsigned_char__const__, stack, - &result, obj); - if (unlikely(retcode == -1)) - goto __pyx_fail; - return result; -__pyx_fail: - result.memview = NULL; - result.data = NULL; - return result; -} - -/* MemviewDtypeToObject */ - static CYTHON_INLINE PyObject *__pyx_memview_get_unsigned_char__const__(const char *itemp) { - return (PyObject *) __Pyx_PyInt_From_unsigned_char(*(unsigned char const *) itemp); -} - -/* MemviewSliceCopyTemplate */ - static __Pyx_memviewslice -__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs, - const char *mode, int ndim, - size_t sizeof_dtype, int contig_flag, - int dtype_is_object) -{ - __Pyx_RefNannyDeclarations - int i; - __Pyx_memviewslice new_mvs = { 0, 0, { 0 }, { 0 }, { 0 } }; - struct __pyx_memoryview_obj *from_memview = from_mvs->memview; - Py_buffer *buf = &from_memview->view; - PyObject *shape_tuple = NULL; - PyObject *temp_int = NULL; - struct __pyx_array_obj *array_obj = NULL; - struct __pyx_memoryview_obj *memview_obj = NULL; - __Pyx_RefNannySetupContext("__pyx_memoryview_copy_new_contig", 0); - for (i = 0; i < ndim; i++) { - if (unlikely(from_mvs->suboffsets[i] >= 0)) { - PyErr_Format(PyExc_ValueError, "Cannot copy memoryview slice with " - "indirect dimensions (axis %d)", i); - goto fail; - } - } - shape_tuple = PyTuple_New(ndim); - if (unlikely(!shape_tuple)) { - goto fail; - } - __Pyx_GOTREF(shape_tuple); - for(i = 0; i < ndim; i++) { - temp_int = PyInt_FromSsize_t(from_mvs->shape[i]); - if(unlikely(!temp_int)) { - goto fail; - } else { - PyTuple_SET_ITEM(shape_tuple, i, temp_int); - temp_int = NULL; - } - } - array_obj = __pyx_array_new(shape_tuple, sizeof_dtype, buf->format, (char *) mode, NULL); - if (unlikely(!array_obj)) { - goto fail; - } - __Pyx_GOTREF(array_obj); - memview_obj = (struct __pyx_memoryview_obj *) __pyx_memoryview_new( - (PyObject *) array_obj, contig_flag, - dtype_is_object, - from_mvs->memview->typeinfo); - if (unlikely(!memview_obj)) - goto fail; - if (unlikely(__Pyx_init_memviewslice(memview_obj, ndim, &new_mvs, 1) < 0)) - goto fail; - if (unlikely(__pyx_memoryview_copy_contents(*from_mvs, new_mvs, ndim, ndim, - dtype_is_object) < 0)) - goto fail; - goto no_fail; -fail: - __Pyx_XDECREF(new_mvs.memview); - new_mvs.memview = NULL; - new_mvs.data = NULL; -no_fail: - __Pyx_XDECREF(shape_tuple); - __Pyx_XDECREF(temp_int); - __Pyx_XDECREF(array_obj); - __Pyx_RefNannyFinishContext(); - return new_mvs; -} - -/* MemviewSliceInit */ - static int -__Pyx_init_memviewslice(struct __pyx_memoryview_obj *memview, - int ndim, - __Pyx_memviewslice *memviewslice, - int memview_is_new_reference) -{ - __Pyx_RefNannyDeclarations - int i, retval=-1; - Py_buffer *buf = &memview->view; - __Pyx_RefNannySetupContext("init_memviewslice", 0); - if (unlikely(memviewslice->memview || memviewslice->data)) { - PyErr_SetString(PyExc_ValueError, - "memviewslice is already initialized!"); - goto fail; - } - if (buf->strides) { - for (i = 0; i < ndim; i++) { - memviewslice->strides[i] = buf->strides[i]; - } - } else { - Py_ssize_t stride = buf->itemsize; - for (i = ndim - 1; i >= 0; i--) { - memviewslice->strides[i] = stride; - stride *= buf->shape[i]; - } - } - for (i = 0; i < ndim; i++) { - memviewslice->shape[i] = buf->shape[i]; - if (buf->suboffsets) { - memviewslice->suboffsets[i] = buf->suboffsets[i]; - } else { - memviewslice->suboffsets[i] = -1; - } - } - memviewslice->memview = memview; - memviewslice->data = (char *)buf->buf; - if (__pyx_add_acquisition_count(memview) == 0 && !memview_is_new_reference) { - Py_INCREF(memview); - } - retval = 0; - goto no_fail; -fail: - memviewslice->memview = 0; - memviewslice->data = 0; - retval = -1; -no_fail: - __Pyx_RefNannyFinishContext(); - return retval; -} -#ifndef Py_NO_RETURN -#define Py_NO_RETURN -#endif -static void __pyx_fatalerror(const char *fmt, ...) Py_NO_RETURN { - va_list vargs; - char msg[200]; -#if PY_VERSION_HEX >= 0x030A0000 || defined(HAVE_STDARG_PROTOTYPES) - va_start(vargs, fmt); -#else - va_start(vargs); -#endif - vsnprintf(msg, 200, fmt, vargs); - va_end(vargs); - Py_FatalError(msg); -} -static CYTHON_INLINE int -__pyx_add_acquisition_count_locked(__pyx_atomic_int_type *acquisition_count, - PyThread_type_lock lock) -{ - int result; - PyThread_acquire_lock(lock, 1); - result = (*acquisition_count)++; - PyThread_release_lock(lock); - return result; -} -static CYTHON_INLINE int -__pyx_sub_acquisition_count_locked(__pyx_atomic_int_type *acquisition_count, - PyThread_type_lock lock) -{ - int result; - PyThread_acquire_lock(lock, 1); - result = (*acquisition_count)--; - PyThread_release_lock(lock); - return result; -} -static CYTHON_INLINE void -__Pyx_INC_MEMVIEW(__Pyx_memviewslice *memslice, int have_gil, int lineno) -{ - __pyx_nonatomic_int_type old_acquisition_count; - struct __pyx_memoryview_obj *memview = memslice->memview; - if (unlikely(!memview || (PyObject *) memview == Py_None)) { - return; - } - old_acquisition_count = __pyx_add_acquisition_count(memview); - if (unlikely(old_acquisition_count <= 0)) { - if (likely(old_acquisition_count == 0)) { - if (have_gil) { - Py_INCREF((PyObject *) memview); - } else { - PyGILState_STATE _gilstate = PyGILState_Ensure(); - Py_INCREF((PyObject *) memview); - PyGILState_Release(_gilstate); - } - } else { - __pyx_fatalerror("Acquisition count is %d (line %d)", - old_acquisition_count+1, lineno); - } - } -} -static CYTHON_INLINE void __Pyx_XCLEAR_MEMVIEW(__Pyx_memviewslice *memslice, - int have_gil, int lineno) { - __pyx_nonatomic_int_type old_acquisition_count; - struct __pyx_memoryview_obj *memview = memslice->memview; - if (unlikely(!memview || (PyObject *) memview == Py_None)) { - memslice->memview = NULL; - return; - } - old_acquisition_count = __pyx_sub_acquisition_count(memview); - memslice->data = NULL; - if (likely(old_acquisition_count > 1)) { - memslice->memview = NULL; - } else if (likely(old_acquisition_count == 1)) { - if (have_gil) { - Py_CLEAR(memslice->memview); - } else { - PyGILState_STATE _gilstate = PyGILState_Ensure(); - Py_CLEAR(memslice->memview); - PyGILState_Release(_gilstate); - } - } else { - __pyx_fatalerror("Acquisition count is %d (line %d)", - old_acquisition_count-1, lineno); - } -} - -/* CIntFromPy */ - static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(int) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (int) val; - } - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) { - return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) { - return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) { - return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (int) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(int) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) { - return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(int) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - int val; - PyObject *v = __Pyx_PyNumber_IntOrLong(x); -#if PY_MAJOR_VERSION < 3 - if (likely(v) && !PyLong_Check(v)) { - PyObject *tmp = v; - v = PyNumber_Long(tmp); - Py_DECREF(tmp); - } -#endif - if (likely(v)) { - int ret = -1; -#if !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)v, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - long idigit; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (unlikely(!PyLong_CheckExact(v))) { - PyObject *tmp = v; - v = PyNumber_Long(v); - assert(PyLong_CheckExact(v)); - Py_DECREF(tmp); - if (unlikely(!v)) return (int) -1; - } -#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(x) == 0) - return (int) 0; - is_negative = Py_SIZE(x) < 0; -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (int) -1; - is_negative = result == 1; - } -#endif - if (is_unsigned && unlikely(is_negative)) { - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - if (unlikely(!stepval)) - return (int) -1; - } else { - stepval = __Pyx_NewRef(v); - } - val = (int) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - val |= ((int) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(stepval) == 0) - goto unpacking_done; - #endif - } - idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((int) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - unpacking_done: - #endif - if (!is_unsigned) { - if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - Py_DECREF(v); - if (likely(!ret)) - return val; - } - return (int) -1; - } - } else { - int val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (int) -1; - val = __Pyx_PyInt_As_int(tmp); - Py_DECREF(tmp); - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to int"); - return (int) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to int"); - return (int) -1; -} - -/* CIntFromPy */ - static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const size_t neg_one = (size_t) -1, const_zero = (size_t) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(size_t) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (size_t) val; - } - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 2 * PyLong_SHIFT)) { - return (size_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 3 * PyLong_SHIFT)) { - return (size_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) >= 4 * PyLong_SHIFT)) { - return (size_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (size_t) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(size_t) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(size_t) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(size_t, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(size_t) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(size_t) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - return (size_t) ((((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(size_t) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(size_t) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - return (size_t) ((((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(size_t) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { - return (size_t) (((size_t)-1)*(((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(size_t) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(size_t) - 1 > 4 * PyLong_SHIFT)) { - return (size_t) ((((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(size_t) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(size_t) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(size_t, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - size_t val; - PyObject *v = __Pyx_PyNumber_IntOrLong(x); -#if PY_MAJOR_VERSION < 3 - if (likely(v) && !PyLong_Check(v)) { - PyObject *tmp = v; - v = PyNumber_Long(tmp); - Py_DECREF(tmp); - } -#endif - if (likely(v)) { - int ret = -1; -#if !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)v, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - long idigit; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (unlikely(!PyLong_CheckExact(v))) { - PyObject *tmp = v; - v = PyNumber_Long(v); - assert(PyLong_CheckExact(v)); - Py_DECREF(tmp); - if (unlikely(!v)) return (size_t) -1; - } -#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(x) == 0) - return (size_t) 0; - is_negative = Py_SIZE(x) < 0; -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (size_t) -1; - is_negative = result == 1; - } -#endif - if (is_unsigned && unlikely(is_negative)) { - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - if (unlikely(!stepval)) - return (size_t) -1; - } else { - stepval = __Pyx_NewRef(v); - } - val = (size_t) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(size_t) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - val |= ((size_t) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(stepval) == 0) - goto unpacking_done; - #endif - } - idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(size_t) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((size_t) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - unpacking_done: - #endif - if (!is_unsigned) { - if (unlikely(val & (((size_t) 1) << (sizeof(size_t) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - Py_DECREF(v); - if (likely(!ret)) - return val; - } - return (size_t) -1; - } - } else { - size_t val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (size_t) -1; - val = __Pyx_PyInt_As_size_t(tmp); - Py_DECREF(tmp); - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to size_t"); - return (size_t) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to size_t"); - return (size_t) -1; -} - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int64_t(int64_t value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int64_t neg_one = (int64_t) -1, const_zero = (int64_t) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(int64_t) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(int64_t) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int64_t) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(int64_t) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int64_t) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API - return _PyLong_FromByteArray(bytes, sizeof(int64_t), - little, !is_unsigned); -#else - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyInt_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int64_t)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(!is_unsigned ? Py_True : Py_False))) goto limited_bad; - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(from_bytes); - Py_XDECREF(py_bytes); - Py_XDECREF(order_str); - Py_XDECREF(arg_tuple); - Py_XDECREF(kwds); - return result; -#endif - } -} - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_uint64_t(uint64_t value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const uint64_t neg_one = (uint64_t) -1, const_zero = (uint64_t) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(uint64_t) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(uint64_t) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(uint64_t) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(uint64_t) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(uint64_t) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API - return _PyLong_FromByteArray(bytes, sizeof(uint64_t), - little, !is_unsigned); -#else - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyInt_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(uint64_t)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(!is_unsigned ? Py_True : Py_False))) goto limited_bad; - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(from_bytes); - Py_XDECREF(py_bytes); - Py_XDECREF(order_str); - Py_XDECREF(arg_tuple); - Py_XDECREF(kwds); - return result; -#endif - } -} - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_char(unsigned char value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const unsigned char neg_one = (unsigned char) -1, const_zero = (unsigned char) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(unsigned char) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(unsigned char) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(unsigned char) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(unsigned char) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(unsigned char) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API - return _PyLong_FromByteArray(bytes, sizeof(unsigned char), - little, !is_unsigned); -#else - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyInt_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(unsigned char)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(!is_unsigned ? Py_True : Py_False))) goto limited_bad; - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(from_bytes); - Py_XDECREF(py_bytes); - Py_XDECREF(order_str); - Py_XDECREF(arg_tuple); - Py_XDECREF(kwds); - return result; -#endif - } -} - -/* CIntFromPy */ - static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(long) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (long) val; - } - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) { - return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) { - return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) { - return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (long) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(long) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) { - return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(long) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - long val; - PyObject *v = __Pyx_PyNumber_IntOrLong(x); -#if PY_MAJOR_VERSION < 3 - if (likely(v) && !PyLong_Check(v)) { - PyObject *tmp = v; - v = PyNumber_Long(tmp); - Py_DECREF(tmp); - } -#endif - if (likely(v)) { - int ret = -1; -#if !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)v, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - long idigit; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (unlikely(!PyLong_CheckExact(v))) { - PyObject *tmp = v; - v = PyNumber_Long(v); - assert(PyLong_CheckExact(v)); - Py_DECREF(tmp); - if (unlikely(!v)) return (long) -1; - } -#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(x) == 0) - return (long) 0; - is_negative = Py_SIZE(x) < 0; -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (long) -1; - is_negative = result == 1; - } -#endif - if (is_unsigned && unlikely(is_negative)) { - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - if (unlikely(!stepval)) - return (long) -1; - } else { - stepval = __Pyx_NewRef(v); - } - val = (long) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - val |= ((long) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(stepval) == 0) - goto unpacking_done; - #endif - } - idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((long) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - unpacking_done: - #endif - if (!is_unsigned) { - if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - Py_DECREF(v); - if (likely(!ret)) - return val; - } - return (long) -1; - } - } else { - long val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (long) -1; - val = __Pyx_PyInt_As_long(tmp); - Py_DECREF(tmp); - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to long"); - return (long) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to long"); - return (long) -1; -} - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const int neg_one = (int) -1, const_zero = (int) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(int) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(int) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(int) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API - return _PyLong_FromByteArray(bytes, sizeof(int), - little, !is_unsigned); -#else - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyInt_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(int)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(!is_unsigned ? Py_True : Py_False))) goto limited_bad; - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(from_bytes); - Py_XDECREF(py_bytes); - Py_XDECREF(order_str); - Py_XDECREF(arg_tuple); - Py_XDECREF(kwds); - return result; -#endif - } -} - -/* CIntToPy */ - static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const long neg_one = (long) -1, const_zero = (long) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; - if (is_unsigned) { - if (sizeof(long) < sizeof(long)) { - return PyInt_FromLong((long) value); - } else if (sizeof(long) <= sizeof(unsigned long)) { - return PyLong_FromUnsignedLong((unsigned long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { - return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); -#endif - } - } else { - if (sizeof(long) <= sizeof(long)) { - return PyInt_FromLong((long) value); -#ifdef HAVE_LONG_LONG - } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { - return PyLong_FromLongLong((PY_LONG_LONG) value); -#endif - } - } - { - int one = 1; int little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&value; -#if !CYTHON_COMPILING_IN_LIMITED_API - return _PyLong_FromByteArray(bytes, sizeof(long), - little, !is_unsigned); -#else - PyObject *from_bytes, *result = NULL; - PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL; - from_bytes = PyObject_GetAttrString((PyObject*)&PyInt_Type, "from_bytes"); - if (!from_bytes) return NULL; - py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long)); - if (!py_bytes) goto limited_bad; - order_str = PyUnicode_FromString(little ? "little" : "big"); - if (!order_str) goto limited_bad; - arg_tuple = PyTuple_Pack(2, py_bytes, order_str); - if (!arg_tuple) goto limited_bad; - kwds = PyDict_New(); - if (!kwds) goto limited_bad; - if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(!is_unsigned ? Py_True : Py_False))) goto limited_bad; - result = PyObject_Call(from_bytes, arg_tuple, kwds); - limited_bad: - Py_XDECREF(from_bytes); - Py_XDECREF(py_bytes); - Py_XDECREF(order_str); - Py_XDECREF(arg_tuple); - Py_XDECREF(kwds); - return result; -#endif - } -} - -/* CIntFromPy */ - static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) { -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wconversion" -#endif - const char neg_one = (char) -1, const_zero = (char) 0; -#ifdef __Pyx_HAS_GCC_DIAGNOSTIC -#pragma GCC diagnostic pop -#endif - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if ((sizeof(char) < sizeof(long))) { - __PYX_VERIFY_RETURN_INT(char, long, PyInt_AS_LONG(x)) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - goto raise_neg_overflow; - } - return (char) val; - } - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { -#if CYTHON_USE_PYLONG_INTERNALS - if (unlikely(__Pyx_PyLong_IsNeg(x))) { - goto raise_neg_overflow; - } else if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_DigitCount(x)) { - case 2: - if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 2 * PyLong_SHIFT)) { - return (char) (((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - case 3: - if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 3 * PyLong_SHIFT)) { - return (char) (((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - case 4: - if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) >= 4 * PyLong_SHIFT)) { - return (char) (((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0])); - } - } - break; - } - } -#endif -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7 - if (unlikely(Py_SIZE(x) < 0)) { - goto raise_neg_overflow; - } -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (char) -1; - if (unlikely(result == 1)) - goto raise_neg_overflow; - } -#endif - if ((sizeof(char) <= sizeof(unsigned long))) { - __PYX_VERIFY_RETURN_INT_EXC(char, unsigned long, PyLong_AsUnsignedLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(char) <= sizeof(unsigned PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(char, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) -#endif - } - } else { -#if CYTHON_USE_PYLONG_INTERNALS - if (__Pyx_PyLong_IsCompact(x)) { - __PYX_VERIFY_RETURN_INT(char, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x)) - } else { - const digit* digits = __Pyx_PyLong_Digits(x); - assert(__Pyx_PyLong_DigitCount(x) > 1); - switch (__Pyx_PyLong_SignedDigitCount(x)) { - case -2: - if ((8 * sizeof(char) - 1 > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 2: - if ((8 * sizeof(char) > 1 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - return (char) ((((((char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case -3: - if ((8 * sizeof(char) - 1 > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 3: - if ((8 * sizeof(char) > 2 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - return (char) ((((((((char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case -4: - if ((8 * sizeof(char) - 1 > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { - return (char) (((char)-1)*(((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - case 4: - if ((8 * sizeof(char) > 3 * PyLong_SHIFT)) { - if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) { - __PYX_VERIFY_RETURN_INT(char, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) - } else if ((8 * sizeof(char) - 1 > 4 * PyLong_SHIFT)) { - return (char) ((((((((((char)digits[3]) << PyLong_SHIFT) | (char)digits[2]) << PyLong_SHIFT) | (char)digits[1]) << PyLong_SHIFT) | (char)digits[0]))); - } - } - break; - } - } -#endif - if ((sizeof(char) <= sizeof(long))) { - __PYX_VERIFY_RETURN_INT_EXC(char, long, PyLong_AsLong(x)) -#ifdef HAVE_LONG_LONG - } else if ((sizeof(char) <= sizeof(PY_LONG_LONG))) { - __PYX_VERIFY_RETURN_INT_EXC(char, PY_LONG_LONG, PyLong_AsLongLong(x)) -#endif - } - } - { - char val; - PyObject *v = __Pyx_PyNumber_IntOrLong(x); -#if PY_MAJOR_VERSION < 3 - if (likely(v) && !PyLong_Check(v)) { - PyObject *tmp = v; - v = PyNumber_Long(tmp); - Py_DECREF(tmp); - } -#endif - if (likely(v)) { - int ret = -1; -#if !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray) - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - ret = _PyLong_AsByteArray((PyLongObject *)v, - bytes, sizeof(val), - is_little, !is_unsigned); -#else - PyObject *stepval = NULL, *mask = NULL, *shift = NULL; - int bits, remaining_bits, is_negative = 0; - long idigit; - int chunk_size = (sizeof(long) < 8) ? 30 : 62; - if (unlikely(!PyLong_CheckExact(v))) { - PyObject *tmp = v; - v = PyNumber_Long(v); - assert(PyLong_CheckExact(v)); - Py_DECREF(tmp); - if (unlikely(!v)) return (char) -1; - } -#if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(x) == 0) - return (char) 0; - is_negative = Py_SIZE(x) < 0; -#else - { - int result = PyObject_RichCompareBool(x, Py_False, Py_LT); - if (unlikely(result < 0)) - return (char) -1; - is_negative = result == 1; - } -#endif - if (is_unsigned && unlikely(is_negative)) { - goto raise_neg_overflow; - } else if (is_negative) { - stepval = PyNumber_Invert(v); - if (unlikely(!stepval)) - return (char) -1; - } else { - stepval = __Pyx_NewRef(v); - } - val = (char) 0; - mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done; - shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done; - for (bits = 0; bits < (int) sizeof(char) * 8 - chunk_size; bits += chunk_size) { - PyObject *tmp, *digit; - digit = PyNumber_And(stepval, mask); - if (unlikely(!digit)) goto done; - idigit = PyLong_AsLong(digit); - Py_DECREF(digit); - if (unlikely(idigit < 0)) goto done; - tmp = PyNumber_Rshift(stepval, shift); - if (unlikely(!tmp)) goto done; - Py_DECREF(stepval); stepval = tmp; - val |= ((char) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - if (Py_SIZE(stepval) == 0) - goto unpacking_done; - #endif - } - idigit = PyLong_AsLong(stepval); - if (unlikely(idigit < 0)) goto done; - remaining_bits = ((int) sizeof(char) * 8) - bits - (is_unsigned ? 0 : 1); - if (unlikely(idigit >= (1L << remaining_bits))) - goto raise_overflow; - val |= ((char) idigit) << bits; - #if CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030B0000 - unpacking_done: - #endif - if (!is_unsigned) { - if (unlikely(val & (((char) 1) << (sizeof(char) * 8 - 1)))) - goto raise_overflow; - if (is_negative) - val = ~val; - } - ret = 0; - done: - Py_XDECREF(shift); - Py_XDECREF(mask); - Py_XDECREF(stepval); -#endif - Py_DECREF(v); - if (likely(!ret)) - return val; - } - return (char) -1; - } - } else { - char val; - PyObject *tmp = __Pyx_PyNumber_IntOrLong(x); - if (!tmp) return (char) -1; - val = __Pyx_PyInt_As_char(tmp); - Py_DECREF(tmp); - return val; - } -raise_overflow: - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to char"); - return (char) -1; -raise_neg_overflow: - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to char"); - return (char) -1; -} - -/* FormatTypeName */ - #if CYTHON_COMPILING_IN_LIMITED_API -static __Pyx_TypeName -__Pyx_PyType_GetName(PyTypeObject* tp) -{ - PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp, - __pyx_n_s_name_2); - if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) { - PyErr_Clear(); - Py_XDECREF(name); - name = __Pyx_NewRef(__pyx_n_s__59); - } - return name; -} -#endif - -/* PyObjectCall2Args */ - static CYTHON_INLINE PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) { - PyObject *args[3] = {NULL, arg1, arg2}; - return __Pyx_PyObject_FastCall(function, args+1, 2 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); -} - -/* PyObjectCallMethod1 */ - static PyObject* __Pyx__PyObject_CallMethod1(PyObject* method, PyObject* arg) { - PyObject *result = __Pyx_PyObject_CallOneArg(method, arg); - Py_DECREF(method); - return result; -} -static PyObject* __Pyx_PyObject_CallMethod1(PyObject* obj, PyObject* method_name, PyObject* arg) { - PyObject *method = NULL, *result; - int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); - if (likely(is_method)) { - result = __Pyx_PyObject_Call2Args(method, obj, arg); - Py_DECREF(method); - return result; - } - if (unlikely(!method)) return NULL; - return __Pyx__PyObject_CallMethod1(method, arg); -} - -/* CoroutineBase */ - #include -#if PY_VERSION_HEX >= 0x030b00a6 - #ifndef Py_BUILD_CORE - #define Py_BUILD_CORE 1 - #endif - #include "internal/pycore_frame.h" -#endif -#define __Pyx_Coroutine_Undelegate(gen) Py_CLEAR((gen)->yieldfrom) -static int __Pyx_PyGen__FetchStopIterationValue(PyThreadState *__pyx_tstate, PyObject **pvalue) { - PyObject *et, *ev, *tb; - PyObject *value = NULL; - CYTHON_UNUSED_VAR(__pyx_tstate); - __Pyx_ErrFetch(&et, &ev, &tb); - if (!et) { - Py_XDECREF(tb); - Py_XDECREF(ev); - Py_INCREF(Py_None); - *pvalue = Py_None; - return 0; - } - if (likely(et == PyExc_StopIteration)) { - if (!ev) { - Py_INCREF(Py_None); - value = Py_None; - } -#if PY_VERSION_HEX >= 0x030300A0 - else if (likely(__Pyx_IS_TYPE(ev, (PyTypeObject*)PyExc_StopIteration))) { - value = ((PyStopIterationObject *)ev)->value; - Py_INCREF(value); - Py_DECREF(ev); - } -#endif - else if (unlikely(PyTuple_Check(ev))) { - if (PyTuple_GET_SIZE(ev) >= 1) { -#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - value = PyTuple_GET_ITEM(ev, 0); - Py_INCREF(value); -#else - value = PySequence_ITEM(ev, 0); -#endif - } else { - Py_INCREF(Py_None); - value = Py_None; - } - Py_DECREF(ev); - } - else if (!__Pyx_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration)) { - value = ev; - } - if (likely(value)) { - Py_XDECREF(tb); - Py_DECREF(et); - *pvalue = value; - return 0; - } - } else if (!__Pyx_PyErr_GivenExceptionMatches(et, PyExc_StopIteration)) { - __Pyx_ErrRestore(et, ev, tb); - return -1; - } - PyErr_NormalizeException(&et, &ev, &tb); - if (unlikely(!PyObject_TypeCheck(ev, (PyTypeObject*)PyExc_StopIteration))) { - __Pyx_ErrRestore(et, ev, tb); - return -1; - } - Py_XDECREF(tb); - Py_DECREF(et); -#if PY_VERSION_HEX >= 0x030300A0 - value = ((PyStopIterationObject *)ev)->value; - Py_INCREF(value); - Py_DECREF(ev); -#else - { - PyObject* args = __Pyx_PyObject_GetAttrStr(ev, __pyx_n_s_args); - Py_DECREF(ev); - if (likely(args)) { - value = PySequence_GetItem(args, 0); - Py_DECREF(args); - } - if (unlikely(!value)) { - __Pyx_ErrRestore(NULL, NULL, NULL); - Py_INCREF(Py_None); - value = Py_None; - } - } -#endif - *pvalue = value; - return 0; -} -static CYTHON_INLINE -void __Pyx_Coroutine_ExceptionClear(__Pyx_ExcInfoStruct *exc_state) { -#if PY_VERSION_HEX >= 0x030B00a4 - Py_CLEAR(exc_state->exc_value); -#else - PyObject *t, *v, *tb; - t = exc_state->exc_type; - v = exc_state->exc_value; - tb = exc_state->exc_traceback; - exc_state->exc_type = NULL; - exc_state->exc_value = NULL; - exc_state->exc_traceback = NULL; - Py_XDECREF(t); - Py_XDECREF(v); - Py_XDECREF(tb); -#endif -} -#define __Pyx_Coroutine_AlreadyRunningError(gen) (__Pyx__Coroutine_AlreadyRunningError(gen), (PyObject*)NULL) -static void __Pyx__Coroutine_AlreadyRunningError(__pyx_CoroutineObject *gen) { - const char *msg; - CYTHON_MAYBE_UNUSED_VAR(gen); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check((PyObject*)gen)) { - msg = "coroutine already executing"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact((PyObject*)gen)) { - msg = "async generator already executing"; - #endif - } else { - msg = "generator already executing"; - } - PyErr_SetString(PyExc_ValueError, msg); -} -#define __Pyx_Coroutine_NotStartedError(gen) (__Pyx__Coroutine_NotStartedError(gen), (PyObject*)NULL) -static void __Pyx__Coroutine_NotStartedError(PyObject *gen) { - const char *msg; - CYTHON_MAYBE_UNUSED_VAR(gen); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check(gen)) { - msg = "can't send non-None value to a just-started coroutine"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact(gen)) { - msg = "can't send non-None value to a just-started async generator"; - #endif - } else { - msg = "can't send non-None value to a just-started generator"; - } - PyErr_SetString(PyExc_TypeError, msg); -} -#define __Pyx_Coroutine_AlreadyTerminatedError(gen, value, closing) (__Pyx__Coroutine_AlreadyTerminatedError(gen, value, closing), (PyObject*)NULL) -static void __Pyx__Coroutine_AlreadyTerminatedError(PyObject *gen, PyObject *value, int closing) { - CYTHON_MAYBE_UNUSED_VAR(gen); - CYTHON_MAYBE_UNUSED_VAR(closing); - #ifdef __Pyx_Coroutine_USED - if (!closing && __Pyx_Coroutine_Check(gen)) { - PyErr_SetString(PyExc_RuntimeError, "cannot reuse already awaited coroutine"); - } else - #endif - if (value) { - #ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(gen)) - PyErr_SetNone(__Pyx_PyExc_StopAsyncIteration); - else - #endif - PyErr_SetNone(PyExc_StopIteration); - } -} -static -PyObject *__Pyx_Coroutine_SendEx(__pyx_CoroutineObject *self, PyObject *value, int closing) { - __Pyx_PyThreadState_declare - PyThreadState *tstate; - __Pyx_ExcInfoStruct *exc_state; - PyObject *retval; - assert(!self->is_running); - if (unlikely(self->resume_label == 0)) { - if (unlikely(value && value != Py_None)) { - return __Pyx_Coroutine_NotStartedError((PyObject*)self); - } - } - if (unlikely(self->resume_label == -1)) { - return __Pyx_Coroutine_AlreadyTerminatedError((PyObject*)self, value, closing); - } -#if CYTHON_FAST_THREAD_STATE - __Pyx_PyThreadState_assign - tstate = __pyx_tstate; -#else - tstate = __Pyx_PyThreadState_Current; -#endif - exc_state = &self->gi_exc_state; - if (exc_state->exc_value) { - #if CYTHON_COMPILING_IN_PYPY - #else - PyObject *exc_tb; - #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON - exc_tb = PyException_GetTraceback(exc_state->exc_value); - #elif PY_VERSION_HEX >= 0x030B00a4 - exc_tb = ((PyBaseExceptionObject*) exc_state->exc_value)->traceback; - #else - exc_tb = exc_state->exc_traceback; - #endif - if (exc_tb) { - PyTracebackObject *tb = (PyTracebackObject *) exc_tb; - PyFrameObject *f = tb->tb_frame; - assert(f->f_back == NULL); - #if PY_VERSION_HEX >= 0x030B00A1 - f->f_back = PyThreadState_GetFrame(tstate); - #else - Py_XINCREF(tstate->frame); - f->f_back = tstate->frame; - #endif - #if PY_VERSION_HEX >= 0x030B00a4 && !CYTHON_COMPILING_IN_CPYTHON - Py_DECREF(exc_tb); - #endif - } - #endif - } -#if CYTHON_USE_EXC_INFO_STACK - exc_state->previous_item = tstate->exc_info; - tstate->exc_info = exc_state; -#else - if (exc_state->exc_type) { - __Pyx_ExceptionSwap(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); - } else { - __Pyx_Coroutine_ExceptionClear(exc_state); - __Pyx_ExceptionSave(&exc_state->exc_type, &exc_state->exc_value, &exc_state->exc_traceback); - } -#endif - self->is_running = 1; - retval = self->body(self, tstate, value); - self->is_running = 0; -#if CYTHON_USE_EXC_INFO_STACK - exc_state = &self->gi_exc_state; - tstate->exc_info = exc_state->previous_item; - exc_state->previous_item = NULL; - __Pyx_Coroutine_ResetFrameBackpointer(exc_state); -#endif - return retval; -} -static CYTHON_INLINE void __Pyx_Coroutine_ResetFrameBackpointer(__Pyx_ExcInfoStruct *exc_state) { -#if CYTHON_COMPILING_IN_PYPY - CYTHON_UNUSED_VAR(exc_state); -#else - PyObject *exc_tb; - #if PY_VERSION_HEX >= 0x030B00a4 - if (!exc_state->exc_value) return; - exc_tb = PyException_GetTraceback(exc_state->exc_value); - #else - exc_tb = exc_state->exc_traceback; - #endif - if (likely(exc_tb)) { - PyTracebackObject *tb = (PyTracebackObject *) exc_tb; - PyFrameObject *f = tb->tb_frame; - Py_CLEAR(f->f_back); - #if PY_VERSION_HEX >= 0x030B00a4 - Py_DECREF(exc_tb); - #endif - } -#endif -} -static CYTHON_INLINE -PyObject *__Pyx_Coroutine_MethodReturn(PyObject* gen, PyObject *retval) { - CYTHON_MAYBE_UNUSED_VAR(gen); - if (unlikely(!retval)) { - __Pyx_PyThreadState_declare - __Pyx_PyThreadState_assign - if (!__Pyx_PyErr_Occurred()) { - PyObject *exc = PyExc_StopIteration; - #ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(gen)) - exc = __Pyx_PyExc_StopAsyncIteration; - #endif - __Pyx_PyErr_SetNone(exc); - } - } - return retval; -} -#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) -static CYTHON_INLINE -PyObject *__Pyx_PyGen_Send(PyGenObject *gen, PyObject *arg) { -#if PY_VERSION_HEX <= 0x030A00A1 - return _PyGen_Send(gen, arg); -#else - PyObject *result; - if (PyIter_Send((PyObject*)gen, arg ? arg : Py_None, &result) == PYGEN_RETURN) { - if (PyAsyncGen_CheckExact(gen)) { - assert(result == Py_None); - PyErr_SetNone(PyExc_StopAsyncIteration); - } - else if (result == Py_None) { - PyErr_SetNone(PyExc_StopIteration); - } - else { - _PyGen_SetStopIterationValue(result); - } - Py_CLEAR(result); - } - return result; -#endif -} -#endif -static CYTHON_INLINE -PyObject *__Pyx_Coroutine_FinishDelegation(__pyx_CoroutineObject *gen) { - PyObject *ret; - PyObject *val = NULL; - __Pyx_Coroutine_Undelegate(gen); - __Pyx_PyGen__FetchStopIterationValue(__Pyx_PyThreadState_Current, &val); - ret = __Pyx_Coroutine_SendEx(gen, val, 0); - Py_XDECREF(val); - return ret; -} -static PyObject *__Pyx_Coroutine_Send(PyObject *self, PyObject *value) { - PyObject *retval; - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - gen->is_running = 1; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - ret = __Pyx_Coroutine_Send(yf, value); - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - ret = __Pyx_Coroutine_Send(yf, value); - } else - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_PyAsyncGenASend_CheckExact(yf)) { - ret = __Pyx_async_gen_asend_send(yf, value); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyGen_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03050000 && defined(PyCoro_CheckExact) && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyCoro_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, value == Py_None ? NULL : value); - } else - #endif - { - if (value == Py_None) - ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); - else - ret = __Pyx_PyObject_CallMethod1(yf, __pyx_n_s_send, value); - } - gen->is_running = 0; - if (likely(ret)) { - return ret; - } - retval = __Pyx_Coroutine_FinishDelegation(gen); - } else { - retval = __Pyx_Coroutine_SendEx(gen, value, 0); - } - return __Pyx_Coroutine_MethodReturn(self, retval); -} -static int __Pyx_Coroutine_CloseIter(__pyx_CoroutineObject *gen, PyObject *yf) { - PyObject *retval = NULL; - int err = 0; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - retval = __Pyx_Coroutine_Close(yf); - if (!retval) - return -1; - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - retval = __Pyx_Coroutine_Close(yf); - if (!retval) - return -1; - } else - if (__Pyx_CoroutineAwait_CheckExact(yf)) { - retval = __Pyx_CoroutineAwait_Close((__pyx_CoroutineAwaitObject*)yf, NULL); - if (!retval) - return -1; - } else - #endif - #ifdef __Pyx_AsyncGen_USED - if (__pyx_PyAsyncGenASend_CheckExact(yf)) { - retval = __Pyx_async_gen_asend_close(yf, NULL); - } else - if (__pyx_PyAsyncGenAThrow_CheckExact(yf)) { - retval = __Pyx_async_gen_athrow_close(yf, NULL); - } else - #endif - { - PyObject *meth; - gen->is_running = 1; - meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_close); - if (unlikely(!meth)) { - if (unlikely(PyErr_Occurred())) { - PyErr_WriteUnraisable(yf); - } - } else { - retval = __Pyx_PyObject_CallNoArg(meth); - Py_DECREF(meth); - if (unlikely(!retval)) - err = -1; - } - gen->is_running = 0; - } - Py_XDECREF(retval); - return err; -} -static PyObject *__Pyx_Generator_Next(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject*) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - gen->is_running = 1; - #ifdef __Pyx_Generator_USED - if (__Pyx_Generator_CheckExact(yf)) { - ret = __Pyx_Generator_Next(yf); - } else - #endif - #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03030000 && (defined(__linux__) || PY_VERSION_HEX >= 0x030600B3) - if (PyGen_CheckExact(yf)) { - ret = __Pyx_PyGen_Send((PyGenObject*)yf, NULL); - } else - #endif - #ifdef __Pyx_Coroutine_USED - if (__Pyx_Coroutine_Check(yf)) { - ret = __Pyx_Coroutine_Send(yf, Py_None); - } else - #endif - ret = __Pyx_PyObject_GetIterNextFunc(yf)(yf); - gen->is_running = 0; - if (likely(ret)) { - return ret; - } - return __Pyx_Coroutine_FinishDelegation(gen); - } - return __Pyx_Coroutine_SendEx(gen, Py_None, 0); -} -static PyObject *__Pyx_Coroutine_Close_Method(PyObject *self, PyObject *arg) { - CYTHON_UNUSED_VAR(arg); - return __Pyx_Coroutine_Close(self); -} -static PyObject *__Pyx_Coroutine_Close(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject *retval, *raised_exception; - PyObject *yf = gen->yieldfrom; - int err = 0; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - Py_INCREF(yf); - err = __Pyx_Coroutine_CloseIter(gen, yf); - __Pyx_Coroutine_Undelegate(gen); - Py_DECREF(yf); - } - if (err == 0) - PyErr_SetNone(PyExc_GeneratorExit); - retval = __Pyx_Coroutine_SendEx(gen, NULL, 1); - if (unlikely(retval)) { - const char *msg; - Py_DECREF(retval); - if ((0)) { - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_Coroutine_Check(self)) { - msg = "coroutine ignored GeneratorExit"; - #endif - #ifdef __Pyx_AsyncGen_USED - } else if (__Pyx_AsyncGen_CheckExact(self)) { -#if PY_VERSION_HEX < 0x03060000 - msg = "async generator ignored GeneratorExit - might require Python 3.6+ finalisation (PEP 525)"; -#else - msg = "async generator ignored GeneratorExit"; -#endif - #endif - } else { - msg = "generator ignored GeneratorExit"; - } - PyErr_SetString(PyExc_RuntimeError, msg); - return NULL; - } - raised_exception = PyErr_Occurred(); - if (likely(!raised_exception || __Pyx_PyErr_GivenExceptionMatches2(raised_exception, PyExc_GeneratorExit, PyExc_StopIteration))) { - if (raised_exception) PyErr_Clear(); - Py_INCREF(Py_None); - return Py_None; - } - return NULL; -} -static PyObject *__Pyx__Coroutine_Throw(PyObject *self, PyObject *typ, PyObject *val, PyObject *tb, - PyObject *args, int close_on_genexit) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject *yf = gen->yieldfrom; - if (unlikely(gen->is_running)) - return __Pyx_Coroutine_AlreadyRunningError(gen); - if (yf) { - PyObject *ret; - Py_INCREF(yf); - if (__Pyx_PyErr_GivenExceptionMatches(typ, PyExc_GeneratorExit) && close_on_genexit) { - int err = __Pyx_Coroutine_CloseIter(gen, yf); - Py_DECREF(yf); - __Pyx_Coroutine_Undelegate(gen); - if (err < 0) - return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); - goto throw_here; - } - gen->is_running = 1; - if (0 - #ifdef __Pyx_Generator_USED - || __Pyx_Generator_CheckExact(yf) - #endif - #ifdef __Pyx_Coroutine_USED - || __Pyx_Coroutine_Check(yf) - #endif - ) { - ret = __Pyx__Coroutine_Throw(yf, typ, val, tb, args, close_on_genexit); - #ifdef __Pyx_Coroutine_USED - } else if (__Pyx_CoroutineAwait_CheckExact(yf)) { - ret = __Pyx__Coroutine_Throw(((__pyx_CoroutineAwaitObject*)yf)->coroutine, typ, val, tb, args, close_on_genexit); - #endif - } else { - PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(yf, __pyx_n_s_throw); - if (unlikely(!meth)) { - Py_DECREF(yf); - if (unlikely(PyErr_Occurred())) { - gen->is_running = 0; - return NULL; - } - __Pyx_Coroutine_Undelegate(gen); - gen->is_running = 0; - goto throw_here; - } - if (likely(args)) { - ret = __Pyx_PyObject_Call(meth, args, NULL); - } else { - PyObject *cargs[4] = {NULL, typ, val, tb}; - ret = __Pyx_PyObject_FastCall(meth, cargs+1, 3 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET); - } - Py_DECREF(meth); - } - gen->is_running = 0; - Py_DECREF(yf); - if (!ret) { - ret = __Pyx_Coroutine_FinishDelegation(gen); - } - return __Pyx_Coroutine_MethodReturn(self, ret); - } -throw_here: - __Pyx_Raise(typ, val, tb, NULL); - return __Pyx_Coroutine_MethodReturn(self, __Pyx_Coroutine_SendEx(gen, NULL, 0)); -} -static PyObject *__Pyx_Coroutine_Throw(PyObject *self, PyObject *args) { - PyObject *typ; - PyObject *val = NULL; - PyObject *tb = NULL; - if (unlikely(!PyArg_UnpackTuple(args, (char *)"throw", 1, 3, &typ, &val, &tb))) - return NULL; - return __Pyx__Coroutine_Throw(self, typ, val, tb, args, 1); -} -static CYTHON_INLINE int __Pyx_Coroutine_traverse_excstate(__Pyx_ExcInfoStruct *exc_state, visitproc visit, void *arg) { -#if PY_VERSION_HEX >= 0x030B00a4 - Py_VISIT(exc_state->exc_value); -#else - Py_VISIT(exc_state->exc_type); - Py_VISIT(exc_state->exc_value); - Py_VISIT(exc_state->exc_traceback); -#endif - return 0; -} -static int __Pyx_Coroutine_traverse(__pyx_CoroutineObject *gen, visitproc visit, void *arg) { - Py_VISIT(gen->closure); - Py_VISIT(gen->classobj); - Py_VISIT(gen->yieldfrom); - return __Pyx_Coroutine_traverse_excstate(&gen->gi_exc_state, visit, arg); -} -static int __Pyx_Coroutine_clear(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - Py_CLEAR(gen->closure); - Py_CLEAR(gen->classobj); - Py_CLEAR(gen->yieldfrom); - __Pyx_Coroutine_ExceptionClear(&gen->gi_exc_state); -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - Py_CLEAR(((__pyx_PyAsyncGenObject*)gen)->ag_finalizer); - } -#endif - Py_CLEAR(gen->gi_code); - Py_CLEAR(gen->gi_frame); - Py_CLEAR(gen->gi_name); - Py_CLEAR(gen->gi_qualname); - Py_CLEAR(gen->gi_modulename); - return 0; -} -static void __Pyx_Coroutine_dealloc(PyObject *self) { - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - PyObject_GC_UnTrack(gen); - if (gen->gi_weakreflist != NULL) - PyObject_ClearWeakRefs(self); - if (gen->resume_label >= 0) { - PyObject_GC_Track(self); -#if PY_VERSION_HEX >= 0x030400a1 && CYTHON_USE_TP_FINALIZE - if (unlikely(PyObject_CallFinalizerFromDealloc(self))) -#else - Py_TYPE(gen)->tp_del(self); - if (unlikely(Py_REFCNT(self) > 0)) -#endif - { - return; - } - PyObject_GC_UnTrack(self); - } -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - /* We have to handle this case for asynchronous generators - right here, because this code has to be between UNTRACK - and GC_Del. */ - Py_CLEAR(((__pyx_PyAsyncGenObject*)self)->ag_finalizer); - } -#endif - __Pyx_Coroutine_clear(self); - __Pyx_PyHeapTypeObject_GC_Del(gen); -} -static void __Pyx_Coroutine_del(PyObject *self) { - PyObject *error_type, *error_value, *error_traceback; - __pyx_CoroutineObject *gen = (__pyx_CoroutineObject *) self; - __Pyx_PyThreadState_declare - if (gen->resume_label < 0) { - return; - } -#if !CYTHON_USE_TP_FINALIZE - assert(self->ob_refcnt == 0); - __Pyx_SET_REFCNT(self, 1); -#endif - __Pyx_PyThreadState_assign - __Pyx_ErrFetch(&error_type, &error_value, &error_traceback); -#ifdef __Pyx_AsyncGen_USED - if (__Pyx_AsyncGen_CheckExact(self)) { - __pyx_PyAsyncGenObject *agen = (__pyx_PyAsyncGenObject*)self; - PyObject *finalizer = agen->ag_finalizer; - if (finalizer && !agen->ag_closed) { - PyObject *res = __Pyx_PyObject_CallOneArg(finalizer, self); - if (unlikely(!res)) { - PyErr_WriteUnraisable(self); - } else { - Py_DECREF(res); - } - __Pyx_ErrRestore(error_type, error_value, error_traceback); - return; - } - } -#endif - if (unlikely(gen->resume_label == 0 && !error_value)) { -#ifdef __Pyx_Coroutine_USED -#ifdef __Pyx_Generator_USED - if (!__Pyx_Generator_CheckExact(self)) -#endif - { - PyObject_GC_UnTrack(self); -#if PY_MAJOR_VERSION >= 3 || defined(PyErr_WarnFormat) - if (unlikely(PyErr_WarnFormat(PyExc_RuntimeWarning, 1, "coroutine '%.50S' was never awaited", gen->gi_qualname) < 0)) - PyErr_WriteUnraisable(self); -#else - {PyObject *msg; - char *cmsg; - #if CYTHON_COMPILING_IN_PYPY - msg = NULL; - cmsg = (char*) "coroutine was never awaited"; - #else - char *cname; - PyObject *qualname; - qualname = gen->gi_qualname; - cname = PyString_AS_STRING(qualname); - msg = PyString_FromFormat("coroutine '%.50s' was never awaited", cname); - if (unlikely(!msg)) { - PyErr_Clear(); - cmsg = (char*) "coroutine was never awaited"; - } else { - cmsg = PyString_AS_STRING(msg); - } - #endif - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, cmsg, 1) < 0)) - PyErr_WriteUnraisable(self); - Py_XDECREF(msg);} -#endif - PyObject_GC_Track(self); - } -#endif - } else { - PyObject *res = __Pyx_Coroutine_Close(self); - if (unlikely(!res)) { - if (PyErr_Occurred()) - PyErr_WriteUnraisable(self); - } else { - Py_DECREF(res); - } - } - __Pyx_ErrRestore(error_type, error_value, error_traceback); -#if !CYTHON_USE_TP_FINALIZE - assert(Py_REFCNT(self) > 0); - if (likely(--self->ob_refcnt == 0)) { - return; - } - { - Py_ssize_t refcnt = Py_REFCNT(self); - _Py_NewReference(self); - __Pyx_SET_REFCNT(self, refcnt); - } -#if CYTHON_COMPILING_IN_CPYTHON - assert(PyType_IS_GC(Py_TYPE(self)) && - _Py_AS_GC(self)->gc.gc_refs != _PyGC_REFS_UNTRACKED); - _Py_DEC_REFTOTAL; -#endif -#ifdef COUNT_ALLOCS - --Py_TYPE(self)->tp_frees; - --Py_TYPE(self)->tp_allocs; -#endif -#endif -} -static PyObject * -__Pyx_Coroutine_get_name(__pyx_CoroutineObject *self, void *context) -{ - PyObject *name = self->gi_name; - CYTHON_UNUSED_VAR(context); - if (unlikely(!name)) name = Py_None; - Py_INCREF(name); - return name; -} -static int -__Pyx_Coroutine_set_name(__pyx_CoroutineObject *self, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__name__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(self->gi_name, value); - return 0; -} -static PyObject * -__Pyx_Coroutine_get_qualname(__pyx_CoroutineObject *self, void *context) -{ - PyObject *name = self->gi_qualname; - CYTHON_UNUSED_VAR(context); - if (unlikely(!name)) name = Py_None; - Py_INCREF(name); - return name; -} -static int -__Pyx_Coroutine_set_qualname(__pyx_CoroutineObject *self, PyObject *value, void *context) -{ - CYTHON_UNUSED_VAR(context); -#if PY_MAJOR_VERSION >= 3 - if (unlikely(value == NULL || !PyUnicode_Check(value))) -#else - if (unlikely(value == NULL || !PyString_Check(value))) -#endif - { - PyErr_SetString(PyExc_TypeError, - "__qualname__ must be set to a string object"); - return -1; - } - Py_INCREF(value); - __Pyx_Py_XDECREF_SET(self->gi_qualname, value); - return 0; -} -static PyObject * -__Pyx_Coroutine_get_frame(__pyx_CoroutineObject *self, void *context) -{ - PyObject *frame = self->gi_frame; - CYTHON_UNUSED_VAR(context); - if (!frame) { - if (unlikely(!self->gi_code)) { - Py_RETURN_NONE; - } - frame = (PyObject *) PyFrame_New( - PyThreadState_Get(), /*PyThreadState *tstate,*/ - (PyCodeObject*) self->gi_code, /*PyCodeObject *code,*/ - __pyx_d, /*PyObject *globals,*/ - 0 /*PyObject *locals*/ - ); - if (unlikely(!frame)) - return NULL; - self->gi_frame = frame; - } - Py_INCREF(frame); - return frame; -} -static __pyx_CoroutineObject *__Pyx__Coroutine_New( - PyTypeObject* type, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name) { - __pyx_CoroutineObject *gen = PyObject_GC_New(__pyx_CoroutineObject, type); - if (unlikely(!gen)) - return NULL; - return __Pyx__Coroutine_NewInit(gen, body, code, closure, name, qualname, module_name); -} -static __pyx_CoroutineObject *__Pyx__Coroutine_NewInit( - __pyx_CoroutineObject *gen, __pyx_coroutine_body_t body, PyObject *code, PyObject *closure, - PyObject *name, PyObject *qualname, PyObject *module_name) { - gen->body = body; - gen->closure = closure; - Py_XINCREF(closure); - gen->is_running = 0; - gen->resume_label = 0; - gen->classobj = NULL; - gen->yieldfrom = NULL; - #if PY_VERSION_HEX >= 0x030B00a4 - gen->gi_exc_state.exc_value = NULL; - #else - gen->gi_exc_state.exc_type = NULL; - gen->gi_exc_state.exc_value = NULL; - gen->gi_exc_state.exc_traceback = NULL; - #endif -#if CYTHON_USE_EXC_INFO_STACK - gen->gi_exc_state.previous_item = NULL; -#endif - gen->gi_weakreflist = NULL; - Py_XINCREF(qualname); - gen->gi_qualname = qualname; - Py_XINCREF(name); - gen->gi_name = name; - Py_XINCREF(module_name); - gen->gi_modulename = module_name; - Py_XINCREF(code); - gen->gi_code = code; - gen->gi_frame = NULL; - PyObject_GC_Track(gen); - return gen; -} - -/* PatchModuleWithCoroutine */ - static PyObject* __Pyx_Coroutine_patch_module(PyObject* module, const char* py_code) { -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - int result; - PyObject *globals, *result_obj; - globals = PyDict_New(); if (unlikely(!globals)) goto ignore; - result = PyDict_SetItemString(globals, "_cython_coroutine_type", - #ifdef __Pyx_Coroutine_USED - (PyObject*)__pyx_CoroutineType); - #else - Py_None); - #endif - if (unlikely(result < 0)) goto ignore; - result = PyDict_SetItemString(globals, "_cython_generator_type", - #ifdef __Pyx_Generator_USED - (PyObject*)__pyx_GeneratorType); - #else - Py_None); - #endif - if (unlikely(result < 0)) goto ignore; - if (unlikely(PyDict_SetItemString(globals, "_module", module) < 0)) goto ignore; - if (unlikely(PyDict_SetItemString(globals, "__builtins__", __pyx_b) < 0)) goto ignore; - result_obj = PyRun_String(py_code, Py_file_input, globals, globals); - if (unlikely(!result_obj)) goto ignore; - Py_DECREF(result_obj); - Py_DECREF(globals); - return module; -ignore: - Py_XDECREF(globals); - PyErr_WriteUnraisable(module); - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, "Cython module failed to patch module with custom type", 1) < 0)) { - Py_DECREF(module); - module = NULL; - } -#else - py_code++; -#endif - return module; -} - -/* PatchGeneratorABC */ - #ifndef CYTHON_REGISTER_ABCS -#define CYTHON_REGISTER_ABCS 1 -#endif -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) -static PyObject* __Pyx_patch_abc_module(PyObject *module); -static PyObject* __Pyx_patch_abc_module(PyObject *module) { - module = __Pyx_Coroutine_patch_module( - module, "" -"if _cython_generator_type is not None:\n" -" try: Generator = _module.Generator\n" -" except AttributeError: pass\n" -" else: Generator.register(_cython_generator_type)\n" -"if _cython_coroutine_type is not None:\n" -" try: Coroutine = _module.Coroutine\n" -" except AttributeError: pass\n" -" else: Coroutine.register(_cython_coroutine_type)\n" - ); - return module; -} -#endif -static int __Pyx_patch_abc(void) { -#if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) - static int abc_patched = 0; - if (CYTHON_REGISTER_ABCS && !abc_patched) { - PyObject *module; - module = PyImport_ImportModule((PY_MAJOR_VERSION >= 3) ? "collections.abc" : "collections"); - if (unlikely(!module)) { - PyErr_WriteUnraisable(NULL); - if (unlikely(PyErr_WarnEx(PyExc_RuntimeWarning, - ((PY_MAJOR_VERSION >= 3) ? - "Cython module failed to register with collections.abc module" : - "Cython module failed to register with collections module"), 1) < 0)) { - return -1; - } - } else { - module = __Pyx_patch_abc_module(module); - abc_patched = 1; - if (unlikely(!module)) - return -1; - Py_DECREF(module); - } - module = PyImport_ImportModule("backports_abc"); - if (module) { - module = __Pyx_patch_abc_module(module); - Py_XDECREF(module); - } - if (!module) { - PyErr_Clear(); - } - } -#else - if ((0)) __Pyx_Coroutine_patch_module(NULL, NULL); -#endif - return 0; -} - -/* Generator */ - static PyMethodDef __pyx_Generator_methods[] = { - {"send", (PyCFunction) __Pyx_Coroutine_Send, METH_O, - (char*) PyDoc_STR("send(arg) -> send 'arg' into generator,\nreturn next yielded value or raise StopIteration.")}, - {"throw", (PyCFunction) __Pyx_Coroutine_Throw, METH_VARARGS, - (char*) PyDoc_STR("throw(typ[,val[,tb]]) -> raise exception in generator,\nreturn next yielded value or raise StopIteration.")}, - {"close", (PyCFunction) __Pyx_Coroutine_Close_Method, METH_NOARGS, - (char*) PyDoc_STR("close() -> raise GeneratorExit inside generator.")}, - {0, 0, 0, 0} -}; -static PyMemberDef __pyx_Generator_memberlist[] = { - {(char *) "gi_running", T_BOOL, offsetof(__pyx_CoroutineObject, is_running), READONLY, NULL}, - {(char*) "gi_yieldfrom", T_OBJECT, offsetof(__pyx_CoroutineObject, yieldfrom), READONLY, - (char*) PyDoc_STR("object being iterated by 'yield from', or None")}, - {(char*) "gi_code", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_code), READONLY, NULL}, - {(char *) "__module__", T_OBJECT, offsetof(__pyx_CoroutineObject, gi_modulename), 0, 0}, -#if CYTHON_USE_TYPE_SPECS - {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CoroutineObject, gi_weakreflist), READONLY, 0}, -#endif - {0, 0, 0, 0, 0} -}; -static PyGetSetDef __pyx_Generator_getsets[] = { - {(char *) "__name__", (getter)__Pyx_Coroutine_get_name, (setter)__Pyx_Coroutine_set_name, - (char*) PyDoc_STR("name of the generator"), 0}, - {(char *) "__qualname__", (getter)__Pyx_Coroutine_get_qualname, (setter)__Pyx_Coroutine_set_qualname, - (char*) PyDoc_STR("qualified name of the generator"), 0}, - {(char *) "gi_frame", (getter)__Pyx_Coroutine_get_frame, NULL, - (char*) PyDoc_STR("Frame of the generator"), 0}, - {0, 0, 0, 0, 0} -}; -#if CYTHON_USE_TYPE_SPECS -static PyType_Slot __pyx_GeneratorType_slots[] = { - {Py_tp_dealloc, (void *)__Pyx_Coroutine_dealloc}, - {Py_tp_traverse, (void *)__Pyx_Coroutine_traverse}, - {Py_tp_iter, (void *)PyObject_SelfIter}, - {Py_tp_iternext, (void *)__Pyx_Generator_Next}, - {Py_tp_methods, (void *)__pyx_Generator_methods}, - {Py_tp_members, (void *)__pyx_Generator_memberlist}, - {Py_tp_getset, (void *)__pyx_Generator_getsets}, - {Py_tp_getattro, (void *) __Pyx_PyObject_GenericGetAttrNoDict}, -#if CYTHON_USE_TP_FINALIZE - {Py_tp_finalize, (void *)__Pyx_Coroutine_del}, -#endif - {0, 0}, -}; -static PyType_Spec __pyx_GeneratorType_spec = { - __PYX_TYPE_MODULE_PREFIX "generator", - sizeof(__pyx_CoroutineObject), - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, - __pyx_GeneratorType_slots -}; -#else -static PyTypeObject __pyx_GeneratorType_type = { - PyVarObject_HEAD_INIT(0, 0) - __PYX_TYPE_MODULE_PREFIX "generator", - sizeof(__pyx_CoroutineObject), - 0, - (destructor) __Pyx_Coroutine_dealloc, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, - 0, - (traverseproc) __Pyx_Coroutine_traverse, - 0, - 0, - offsetof(__pyx_CoroutineObject, gi_weakreflist), - 0, - (iternextfunc) __Pyx_Generator_Next, - __pyx_Generator_methods, - __pyx_Generator_memberlist, - __pyx_Generator_getsets, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -#if CYTHON_USE_TP_FINALIZE - 0, -#else - __Pyx_Coroutine_del, -#endif - 0, -#if CYTHON_USE_TP_FINALIZE - __Pyx_Coroutine_del, -#elif PY_VERSION_HEX >= 0x030400a1 - 0, -#endif -#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800) - 0, -#endif -#if __PYX_NEED_TP_PRINT_SLOT - 0, -#endif -#if PY_VERSION_HEX >= 0x030C0000 - 0, -#endif -#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000 - 0, -#endif -}; -#endif -static int __pyx_Generator_init(PyObject *module) { -#if CYTHON_USE_TYPE_SPECS - __pyx_GeneratorType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_GeneratorType_spec, NULL); -#else - CYTHON_UNUSED_VAR(module); - __pyx_GeneratorType_type.tp_getattro = __Pyx_PyObject_GenericGetAttrNoDict; - __pyx_GeneratorType_type.tp_iter = PyObject_SelfIter; - __pyx_GeneratorType = __Pyx_FetchCommonType(&__pyx_GeneratorType_type); -#endif - if (unlikely(!__pyx_GeneratorType)) { - return -1; - } - return 0; -} - -/* CheckBinaryVersion */ - static int __Pyx_check_binary_version(void) { - char ctversion[5]; - int same=1, i, found_dot; - const char* rt_from_call = Py_GetVersion(); - PyOS_snprintf(ctversion, 5, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); - found_dot = 0; - for (i = 0; i < 4; i++) { - if (!ctversion[i]) { - same = (rt_from_call[i] < '0' || rt_from_call[i] > '9'); - break; - } - if (rt_from_call[i] != ctversion[i]) { - same = 0; - break; - } - } - if (!same) { - char rtversion[5] = {'\0'}; - char message[200]; - for (i=0; i<4; ++i) { - if (rt_from_call[i] == '.') { - if (found_dot) break; - found_dot = 1; - } else if (rt_from_call[i] < '0' || rt_from_call[i] > '9') { - break; - } - rtversion[i] = rt_from_call[i]; - } - PyOS_snprintf(message, sizeof(message), - "compile time version %s of module '%.100s' " - "does not match runtime version %s", - ctversion, __Pyx_MODULE_NAME, rtversion); - return PyErr_WarnEx(NULL, message, 1); - } - return 0; -} - -/* InitStrings */ - #if PY_MAJOR_VERSION >= 3 -static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) { - if (t.is_unicode | t.is_str) { - if (t.intern) { - *str = PyUnicode_InternFromString(t.s); - } else if (t.encoding) { - *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL); - } else { - *str = PyUnicode_FromStringAndSize(t.s, t.n - 1); - } - } else { - *str = PyBytes_FromStringAndSize(t.s, t.n - 1); - } - if (!*str) - return -1; - if (PyObject_Hash(*str) == -1) - return -1; - return 0; -} -#endif -static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { - while (t->p) { - #if PY_MAJOR_VERSION >= 3 - __Pyx_InitString(*t, t->p); - #else - if (t->is_unicode) { - *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); - } else if (t->intern) { - *t->p = PyString_InternFromString(t->s); - } else { - *t->p = PyString_FromStringAndSize(t->s, t->n - 1); - } - if (!*t->p) - return -1; - if (PyObject_Hash(*t->p) == -1) - return -1; - #endif - ++t; - } - return 0; -} - -static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { - return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); -} -static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) { - Py_ssize_t ignore; - return __Pyx_PyObject_AsStringAndSize(o, &ignore); -} -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT -#if !CYTHON_PEP393_ENABLED -static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - char* defenc_c; - PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); - if (!defenc) return NULL; - defenc_c = PyBytes_AS_STRING(defenc); -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - { - char* end = defenc_c + PyBytes_GET_SIZE(defenc); - char* c; - for (c = defenc_c; c < end; c++) { - if ((unsigned char) (*c) >= 128) { - PyUnicode_AsASCIIString(o); - return NULL; - } - } - } -#endif - *length = PyBytes_GET_SIZE(defenc); - return defenc_c; -} -#else -static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) { - if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL; -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - if (likely(PyUnicode_IS_ASCII(o))) { - *length = PyUnicode_GET_LENGTH(o); - return PyUnicode_AsUTF8(o); - } else { - PyUnicode_AsASCIIString(o); - return NULL; - } -#else - return PyUnicode_AsUTF8AndSize(o, length); -#endif -} -#endif -#endif -static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { -#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT - if ( -#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII - __Pyx_sys_getdefaultencoding_not_ascii && -#endif - PyUnicode_Check(o)) { - return __Pyx_PyUnicode_AsStringAndSize(o, length); - } else -#endif -#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) - if (PyByteArray_Check(o)) { - *length = PyByteArray_GET_SIZE(o); - return PyByteArray_AS_STRING(o); - } else -#endif - { - char* result; - int r = PyBytes_AsStringAndSize(o, &result, length); - if (unlikely(r < 0)) { - return NULL; - } else { - return result; - } - } -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { - int is_true = x == Py_True; - if (is_true | (x == Py_False) | (x == Py_None)) return is_true; - else return PyObject_IsTrue(x); -} -static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) { - int retval; - if (unlikely(!x)) return -1; - retval = __Pyx_PyObject_IsTrue(x); - Py_DECREF(x); - return retval; -} -static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) { - __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result)); -#if PY_MAJOR_VERSION >= 3 - if (PyLong_Check(result)) { - if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "__int__ returned non-int (type " __Pyx_FMT_TYPENAME "). " - "The ability to return an instance of a strict subclass of int is deprecated, " - "and may be removed in a future version of Python.", - result_type_name)) { - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; - } - __Pyx_DECREF_TypeName(result_type_name); - return result; - } -#endif - PyErr_Format(PyExc_TypeError, - "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")", - type_name, type_name, result_type_name); - __Pyx_DECREF_TypeName(result_type_name); - Py_DECREF(result); - return NULL; -} -static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) { -#if CYTHON_USE_TYPE_SLOTS - PyNumberMethods *m; -#endif - const char *name = NULL; - PyObject *res = NULL; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x) || PyLong_Check(x))) -#else - if (likely(PyLong_Check(x))) -#endif - return __Pyx_NewRef(x); -#if CYTHON_USE_TYPE_SLOTS - m = Py_TYPE(x)->tp_as_number; - #if PY_MAJOR_VERSION < 3 - if (m && m->nb_int) { - name = "int"; - res = m->nb_int(x); - } - else if (m && m->nb_long) { - name = "long"; - res = m->nb_long(x); - } - #else - if (likely(m && m->nb_int)) { - name = "int"; - res = m->nb_int(x); - } - #endif -#else - if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) { - res = PyNumber_Int(x); - } -#endif - if (likely(res)) { -#if PY_MAJOR_VERSION < 3 - if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) { -#else - if (unlikely(!PyLong_CheckExact(res))) { -#endif - return __Pyx_PyNumber_IntOrLongWrongResultType(res, name); - } - } - else if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "an integer is required"); - } - return res; -} -static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { - Py_ssize_t ival; - PyObject *x; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_CheckExact(b))) { - if (sizeof(Py_ssize_t) >= sizeof(long)) - return PyInt_AS_LONG(b); - else - return PyInt_AsSsize_t(b); - } -#endif - if (likely(PyLong_CheckExact(b))) { - #if CYTHON_USE_PYLONG_INTERNALS - if (likely(__Pyx_PyLong_IsCompact(b))) { - return __Pyx_PyLong_CompactValue(b); - } else { - const digit* digits = __Pyx_PyLong_Digits(b); - const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b); - switch (size) { - case 2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -2: - if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -3: - if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case 4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - case -4: - if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { - return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); - } - break; - } - } - #endif - return PyLong_AsSsize_t(b); - } - x = PyNumber_Index(b); - if (!x) return -1; - ival = PyInt_AsSsize_t(x); - Py_DECREF(x); - return ival; -} -static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) { - if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) { - return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o); -#if PY_MAJOR_VERSION < 3 - } else if (likely(PyInt_CheckExact(o))) { - return PyInt_AS_LONG(o); -#endif - } else { - Py_ssize_t ival; - PyObject *x; - x = PyNumber_Index(o); - if (!x) return -1; - ival = PyInt_AsLong(x); - Py_DECREF(x); - return ival; - } -} -static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { - return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False); -} -static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { - return PyInt_FromSize_t(ival); -} - - -/* #### Code section: utility_code_pragmas_end ### */ -#ifdef _MSC_VER -#pragma warning( pop ) -#endif - - - -/* #### Code section: end ### */ -#endif /* Py_PYTHON_H */ diff --git a/simdjson/simdjson.cpp b/simdjson/simdjson.cpp index a683397..d0f441b 100644 --- a/simdjson/simdjson.cpp +++ b/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-01-28 12:42:59 -0500. Do not edit! */ +/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -40,6 +40,16 @@ #endif #endif +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 @@ -67,6 +77,30 @@ #endif #endif +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +#if defined(__apple_build_version__) +#if __apple_build_version__ < 14000000 +#define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to +#endif +#endif + + +#if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +#if __cpp_concepts >= 201907L +#include +#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif +#else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) + #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ @@ -79,11 +113,18 @@ #include #include #include +#include #ifndef _WIN32 // strcasecmp, strncasecmp #include #endif +static_assert(CHAR_BIT == 8, "simdjson requires 8-bit bytes"); + + +// We are using size_t without namespace std:: throughout the project +using std::size_t; + #ifdef _MSC_VER #define SIMDJSON_VISUAL_STUDIO 1 /** @@ -105,13 +146,16 @@ #endif // __clang__ #endif // _MSC_VER -#if defined(__x86_64__) || defined(_M_AMD64) +#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) #define SIMDJSON_IS_X86_64 1 -#elif defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 +#elif defined(__loongarch_lp64) +#define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) +#define SIMDJSON_IS_PPC64 1 #if defined(__ALTIVEC__) #define SIMDJSON_IS_PPC64_VMX 1 #endif // defined(__ALTIVEC__) @@ -219,6 +263,11 @@ #define SIMDJSON_NO_SANITIZE_UNDEFINED #endif +#if defined(__clang__) || defined(__GNUC__) +#define simdjson_pure [[gnu::pure]] +#else +#define simdjson_pure +#endif #if defined(__clang__) || defined(__GNUC__) #if defined(__has_feature) @@ -264,6 +313,45 @@ #endif + + +#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#define SIMDJSON_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined _WIN32 +#define SIMDJSON_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#elif defined(__MVS__) +#include +#else +#ifdef __has_include +#if __has_include() +#include +#endif //__has_include() +#endif //__has_include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define SIMDJSON_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define SIMDJSON_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define SIMDJSON_IS_BIG_ENDIAN 0 +#else +#define SIMDJSON_IS_BIG_ENDIAN 1 +#endif +#endif + + #endif // SIMDJSON_PORTABILITY_H /* end file simdjson/portability.h */ @@ -312,6 +400,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #if SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __declspec(deprecated) #define simdjson_really_inline __forceinline #define simdjson_never_inline __declspec(noinline) @@ -350,6 +440,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS #else // SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __attribute__((deprecated)) #define simdjson_really_inline inline __attribute__((always_inline)) #define simdjson_never_inline inline __attribute__((noinline)) @@ -525,13 +617,12 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -// #pragma once // We remove #pragma once here as it generates a warning in some cases. We rely on the include guard. #ifndef NONSTD_SV_LITE_H_INCLUDED #define NONSTD_SV_LITE_H_INCLUDED #define string_view_lite_MAJOR 1 -#define string_view_lite_MINOR 7 +#define string_view_lite_MINOR 8 #define string_view_lite_PATCH 0 #define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) @@ -653,6 +744,8 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +#include + namespace nonstd { template< class CharT, class Traits, class Allocator = std::allocator > @@ -683,22 +776,22 @@ inline namespace literals { inline namespace string_view_literals { -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } @@ -2029,22 +2122,22 @@ nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2053,22 +2146,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, #if nssv_CONFIG_USR_SV_OPERATOR -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2338,13 +2431,14 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong, this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error. DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' NUMBER_ERROR, ///< Problem while parsing a number + BIGINT_ERROR, ///< The integer value exceeds 64 bits UTF8_ERROR, ///< the input is not valid UTF-8 UNINITIALIZED, ///< unknown error, or uninitialized document EMPTY, ///< no structural element found @@ -2356,19 +2450,27 @@ enum error_code { INDEX_OUT_OF_BOUNDS, ///< JSON array index too large NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax INVALID_URI_FRAGMENT, ///< Invalid URI fragment UNEXPECTED_ERROR, ///< indicative of a bug in simdjson PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; +/** + * Some errors are fatal and invalidate the document. This function returns true if the + * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT. + * Once a fatal error is encountered, the on-demand document is no longer valid and + * processing should stop. + */ + inline bool is_fatal(error_code error) noexcept; + /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code @@ -2403,7 +2505,7 @@ struct simdjson_error : public std::exception { */ simdjson_error(error_code error) noexcept : _error{error} { } /** The error message */ - const char *what() const noexcept { return error_message(error()); } + const char *what() const noexcept override { return error_message(error()); } /** The error code */ error_code error() const noexcept { return _error; } private: @@ -2635,6 +2737,140 @@ inline const std::string error_message(int error) noexcept; #endif // SIMDJSON_ERROR_H /* end file simdjson/error.h */ /* skipped duplicate #include "simdjson/portability.h" */ +/* including simdjson/concepts.h: #include "simdjson/concepts.h" */ +/* begin file simdjson/concepts.h */ +#ifndef SIMDJSON_CONCEPTS_H +#define SIMDJSON_CONCEPTS_H +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#include +#include + +namespace simdjson { +namespace concepts { + +namespace details { +#define SIMDJSON_IMPL_CONCEPT(name, method) \ + template \ + concept supports_##name = !std::is_const_v && requires { \ + typename std::remove_cvref_t::value_type; \ + requires requires(typename std::remove_cvref_t::value_type &&val, \ + T obj) { \ + obj.method(std::move(val)); \ + requires !requires { obj = std::move(val); }; \ + }; \ + }; + +SIMDJSON_IMPL_CONCEPT(emplace_back, emplace_back) +SIMDJSON_IMPL_CONCEPT(emplace, emplace) +SIMDJSON_IMPL_CONCEPT(push_back, push_back) +SIMDJSON_IMPL_CONCEPT(add, add) +SIMDJSON_IMPL_CONCEPT(push, push) +SIMDJSON_IMPL_CONCEPT(append, append) +SIMDJSON_IMPL_CONCEPT(insert, insert) +SIMDJSON_IMPL_CONCEPT(op_append, operator+=) + +#undef SIMDJSON_IMPL_CONCEPT +} // namespace details + + +template +concept string_view_like = std::is_convertible_v && + !std::is_convertible_v; + +template +concept constructible_from_string_view = std::is_constructible_v + && !std::is_same_v + && std::is_default_constructible_v; + +template +concept string_view_keyed_map = string_view_like + && requires(std::remove_cvref_t& m, typename M::key_type sv, typename M::mapped_type v) { + { m.emplace(sv, v) } -> std::same_as>; +}; + +/// Check if T is a container that we can append to, including: +/// std::vector, std::deque, std::list, std::string, ... +template +concept appendable_containers = + (details::supports_emplace_back || details::supports_emplace || + details::supports_push_back || details::supports_push || + details::supports_add || details::supports_append || + details::supports_insert) && !string_view_keyed_map; + +/// Insert into the container however possible +template +constexpr decltype(auto) emplace_one(T &vec, Args &&...args) { + if constexpr (details::supports_emplace_back) { + return vec.emplace_back(std::forward(args)...); + } else if constexpr (details::supports_emplace) { + return vec.emplace(std::forward(args)...); + } else if constexpr (details::supports_push_back) { + return vec.push_back(std::forward(args)...); + } else if constexpr (details::supports_push) { + return vec.push(std::forward(args)...); + } else if constexpr (details::supports_add) { + return vec.add(std::forward(args)...); + } else if constexpr (details::supports_append) { + return vec.append(std::forward(args)...); + } else if constexpr (details::supports_insert) { + return vec.insert(std::forward(args)...); + } else if constexpr (details::supports_op_append && sizeof...(Args) == 1) { + return vec.operator+=(std::forward(args)...); + } else { + static_assert(!sizeof(T *), + "We don't know how to add things to this container"); + } +} + +/// This checks if the container will return a reference to the newly added +/// element after an insert which for example `std::vector::emplace_back` does +/// since C++17; this will allow some optimizations. +template +concept returns_reference = appendable_containers && requires { + typename std::remove_cvref_t::reference; + requires requires(typename std::remove_cvref_t::value_type &&val, T obj) { + { + emplace_one(obj, std::move(val)) + } -> std::same_as::reference>; + }; +}; + +template +concept smart_pointer = requires(std::remove_cvref_t ptr) { + // Check if T has a member type named element_type + typename std::remove_cvref_t::element_type; + + // Check if T has a get() member function + { + ptr.get() + } -> std::same_as::element_type *>; + + // Check if T can be dereferenced + { *ptr } -> std::same_as::element_type &>; +}; + +template +concept optional_type = requires(std::remove_cvref_t obj) { + typename std::remove_cvref_t::value_type; + { obj.value() } -> std::same_as::value_type&>; + requires requires(typename std::remove_cvref_t::value_type &&val) { + obj.emplace(std::move(val)); + obj = std::move(val); + { + obj.value_or(val) + } -> std::convertible_to::value_type>; + }; + { static_cast(obj) } -> std::same_as; // convertible to bool +}; + + + +} // namespace concepts +} // namespace simdjson +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_CONCEPTS_H +/* end file simdjson/concepts.h */ /** * @brief The top level simdjson namespace, containing everything the library provides. @@ -4301,6 +4537,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; #include namespace simdjson { + +inline bool is_fatal(error_code error) noexcept { + return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT; +} + namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { @@ -4486,13 +4727,14 @@ namespace internal { { SUCCESS, "SUCCESS: No error" }, { CAPACITY, "CAPACITY: This parser can't support a document that big" }, { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" }, - { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error." }, { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" }, { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" }, { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" }, { F_ATOM_ERROR, "F_ATOM_ERROR: Problem while parsing an atom starting with the letter 'f'" }, { N_ATOM_ERROR, "N_ATOM_ERROR: Problem while parsing an atom starting with the letter 'n'" }, { NUMBER_ERROR, "NUMBER_ERROR: Problem while parsing a number" }, + { BIGINT_ERROR, "BIGINT_ERROR: Big integer value that cannot be represented using 64 bits" }, { UTF8_ERROR, "UTF8_ERROR: The input is not valid UTF-8" }, { UNINITIALIZED, "UNINITIALIZED: Uninitialized" }, { EMPTY, "EMPTY: no JSON found" }, @@ -4510,7 +4752,7 @@ namespace internal { { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." }, { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." }, { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, - { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error." }, { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."}, { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."} @@ -5485,6 +5727,8 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #define SIMDJSON_IMPLEMENTATION_ID_icelake 4 #define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 #define SIMDJSON_IMPLEMENTATION_ID_westmere 6 +#define SIMDJSON_IMPLEMENTATION_ID_lsx 7 +#define SIMDJSON_IMPLEMENTATION_ID_lasx 8 #define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) #define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) @@ -5499,7 +5743,11 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #ifndef SIMDJSON_IMPLEMENTATION_ARM64 #define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) #endif -#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 +#if SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 0 +#endif // Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected // at runtime. @@ -5510,9 +5758,20 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #ifdef _MSC_VER // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see // https://github.com/simdjson/simdjson/issues/1247 -#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 #else -#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif + #endif // Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected @@ -5528,9 +5787,20 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #ifdef _MSC_VER // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see // https://github.com/simdjson/simdjson/issues/1247 -#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + +#else + +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 #else -#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif + #endif // Default Westmere to on if this is x86-64. @@ -5542,16 +5812,40 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 #endif #endif -#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) + +#if (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 0 +#endif + #ifndef SIMDJSON_IMPLEMENTATION_PPC64 #define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) #endif -#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +#if SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 0 +#endif + +#ifndef SIMDJSON_IMPLEMENTATION_LASX +#define SIMDJSON_IMPLEMENTATION_LASX (SIMDJSON_IS_LOONGARCH64 && __loongarch_asx) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LASX (SIMDJSON_IMPLEMENTATION_LASX) + +#ifndef SIMDJSON_IMPLEMENTATION_LSX +#if SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_IMPLEMENTATION_LSX 0 +#else +#define SIMDJSON_IMPLEMENTATION_LSX (SIMDJSON_IS_LOONGARCH64 && __loongarch_sx) +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LSX (SIMDJSON_IMPLEMENTATION_LSX) // Default Fallback to on unless a builtin implementation has already been selected. #ifndef SIMDJSON_IMPLEMENTATION_FALLBACK -#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 || SIMDJSON_CAN_ALWAYS_RUN_LSX || SIMDJSON_CAN_ALWAYS_RUN_LASX // if anything at all except fallback can always run, then disable fallback. #define SIMDJSON_IMPLEMENTATION_FALLBACK 0 #else @@ -5573,6 +5867,10 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 #elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 #define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_LSX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lsx +#elif SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lasx #elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK #define SIMDJSON_BUILTIN_IMPLEMENTATION fallback #else @@ -5587,7 +5885,7 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= #endif // SIMDJSON_IMPLEMENTATION_DETECTION_H /* end file simdjson/implementation_detection.h */ -#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 || SIMDJSON_IMPLEMENTATION_LSX || SIMDJSON_IMPLEMENTATION_LASX #include @@ -5717,7 +6015,7 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { } // namespace internal } // namespace simdjson -#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 || SIMDJSON_IMPLEMENTATION_LSX || SIMDJSON_IMPLEMENTATION_LASX #endif // SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP /* end file internal/simdprune_tables.cpp */ @@ -5884,7 +6182,7 @@ class dom_parser_implementation { * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -5901,7 +6199,7 @@ class dom_parser_implementation { * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -5955,14 +6253,14 @@ class dom_parser_implementation { * * @return Current capacity, in bytes. */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -6003,11 +6301,11 @@ simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } -simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } @@ -6079,7 +6377,7 @@ class implementation { * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". */ - virtual const std::string &name() const { return _name; } + virtual std::string name() const { return std::string(_name); } /** * The description of this implementation. @@ -6089,7 +6387,7 @@ class implementation { * * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". */ - virtual const std::string &description() const { return _description; } + virtual std::string description() const { return std::string(_description); } /** * The instruction sets this implementation is compiled against @@ -6165,18 +6463,19 @@ class implementation { _required_instruction_sets(required_instruction_sets) { } - virtual ~implementation()=default; +protected: + ~implementation() = default; private: /** * The name of this implementation. */ - const std::string _name; + std::string_view _name; /** * The description of this implementation. */ - const std::string _description; + std::string_view _description; /** * Instruction sets required for this implementation. @@ -6325,7 +6624,9 @@ enum instruction_set { AVX512CD = 0x2000, AVX512BW = 0x4000, AVX512VL = 0x8000, - AVX512VBMI2 = 0x10000 + AVX512VBMI2 = 0x10000, + LSX = 0x20000, + LASX = 0x40000, }; } // namespace internal @@ -6360,7 +6661,6 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; #endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H /* end file simdjson/internal/simdprune_tables.h */ - #endif // SIMDJSON_GENERIC_DEPENDENCIES_H /* end file simdjson/generic/dependencies.h */ /* including generic/dependencies.h: #include */ @@ -6518,7 +6818,7 @@ class document { * The memory allocation is strict: you * can you use this function to increase * or lower the amount of allocated memory. - * Passsing zero clears the memory. + * Passing zero clears the memory. */ error_code allocate(size_t len) noexcept; /** @private Capacity in bytes, in terms @@ -6650,7 +6950,7 @@ static inline uint32_t detect_supported_architectures() { return instruction_set::ALTIVEC; } -#elif defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) static inline uint32_t detect_supported_architectures() { return instruction_set::NEON; @@ -6803,6 +7103,19 @@ static inline uint32_t detect_supported_architectures() { return host_isa; } + +#elif defined(__loongarch_sx) && !defined(__loongarch_asx) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::LSX; +} + +#elif defined(__loongarch_asx) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::LASX; +} + #else // fallback @@ -6820,6 +7133,7 @@ static inline uint32_t detect_supported_architectures() { /* end file internal/isadetection.h */ #include +#include namespace simdjson { @@ -7128,12 +7442,141 @@ static const simdjson::westmere::implementation* get_westmere_singleton() { } // namespace simdjson #endif // SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_IMPLEMENTATION_LSX +/* including simdjson/lsx/implementation.h: #include */ +/* begin file simdjson/lsx/implementation.h */ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_IMPLEMENTATION_H +/* end file simdjson/lsx/implementation.h */ +namespace simdjson { +namespace internal { +static const simdjson::lsx::implementation* get_lsx_singleton() { + static const simdjson::lsx::implementation lsx_singleton{}; + return &lsx_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_LSX + +#if SIMDJSON_IMPLEMENTATION_LASX +/* including simdjson/lasx/implementation.h: #include */ +/* begin file simdjson/lasx/implementation.h */ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_IMPLEMENTATION_H +/* end file simdjson/lasx/implementation.h */ +namespace simdjson { +namespace internal { +static const simdjson::lasx::implementation* get_lasx_singleton() { + static const simdjson::lasx::implementation lasx_singleton{}; + return &lasx_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_LASX + /* undefining SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_CONDITIONAL_INCLUDE namespace simdjson { namespace internal { +// When there is a single implementation, we should not pay a price +// for dispatching to the best implementation. We should just use the +// one we have. This is a compile-time check. +#define SIMDJSON_SINGLE_IMPLEMENTATION (SIMDJSON_IMPLEMENTATION_ICELAKE \ + + SIMDJSON_IMPLEMENTATION_HASWELL + SIMDJSON_IMPLEMENTATION_WESTMERE \ + + SIMDJSON_IMPLEMENTATION_ARM64 + SIMDJSON_IMPLEMENTATION_PPC64 \ + + SIMDJSON_IMPLEMENTATION_LSX + SIMDJSON_IMPLEMENTATION_LASX \ + + SIMDJSON_IMPLEMENTATION_FALLBACK == 1) + +#if SIMDJSON_SINGLE_IMPLEMENTATION + static const implementation* get_single_implementation() { + return +#if SIMDJSON_IMPLEMENTATION_ICELAKE + get_icelake_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL + get_haswell_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE + get_westmere_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 + get_arm64_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 + get_ppc64_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_LSX + get_lsx_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_LASX + get_lasx_singleton(); +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK + get_fallback_singleton(); +#endif +} +#endif + // Static array of known implementations. We're hoping these get baked into the executable // without requiring a static initializer. @@ -7142,8 +7585,8 @@ namespace internal { */ class detect_best_supported_implementation_on_first_use final : public implementation { public: - const std::string &name() const noexcept final { return set_best()->name(); } - const std::string &description() const noexcept final { return set_best()->description(); } + std::string name() const noexcept final { return set_best()->name(); } + std::string description() const noexcept final { return set_best()->description(); } uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, @@ -7163,6 +7606,8 @@ class detect_best_supported_implementation_on_first_use final : public implement const implementation *set_best() const noexcept; }; +static_assert(std::is_trivially_destructible::value, "detect_best_supported_implementation_on_first_use should be trivially destructible"); + static const std::initializer_list& get_available_implementation_pointers() { static const std::initializer_list available_implementation_pointers { #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -7180,6 +7625,12 @@ static const std::initializer_list& get_available_implem #if SIMDJSON_IMPLEMENTATION_PPC64 get_ppc64_singleton(), #endif +#if SIMDJSON_IMPLEMENTATION_LSX + get_lsx_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_LASX + get_lasx_singleton(), +#endif #if SIMDJSON_IMPLEMENTATION_FALLBACK get_fallback_singleton(), #endif @@ -7213,6 +7664,8 @@ class unsupported_implementation final : public implementation { unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} }; +static_assert(std::is_trivially_destructible::value, "unsupported_singleton should be trivially destructible"); + const unsupported_implementation* get_unsupported_singleton() { static const unsupported_implementation unsupported_singleton{}; return &unsupported_singleton; @@ -7263,9 +7716,16 @@ SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_avai } SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation() { - static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; - static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; - return active_implementation; +#if SIMDJSON_SINGLE_IMPLEMENTATION + // We immediately select the only implementation we have, skipping the + // detect_best_supported_implementation_on_first_use_singleton. + static internal::atomic_ptr active_implementation{internal::get_single_implementation()}; + return active_implementation; +#else + static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +#endif } simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { @@ -7376,7 +7836,7 @@ SIMDJSON_NO_SANITIZE_UNDEFINED // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). @@ -7392,9 +7852,15 @@ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } +// We sometimes call leading_zeroes on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +// Applies only when SIMDJSON_PREFER_REVERSE_BITS is defined and true. +// (See below.) +SIMDJSON_NO_SANITIZE_UNDEFINED /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). @@ -7447,7 +7913,7 @@ simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; #else @@ -7522,10 +7988,10 @@ simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { #include -#if _M_ARM64 +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 // __umulh requires intrin.h #include -#endif // _M_ARM64 +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 namespace simdjson { namespace arm64 { @@ -7545,13 +8011,13 @@ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -7564,7 +8030,13 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } // namespace arm64 } // namespace simdjson +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else #define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif #endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H /* end file simdjson/arm64/numberparsing_defs.h */ @@ -7584,7 +8056,7 @@ namespace arm64 { namespace { namespace simd { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO namespace { // Start of private section with Visual Studio workaround @@ -7693,7 +8165,7 @@ namespace { // We return uint32_t instead of uint16_t because that seems to be more efficient for most // purposes (cutting it down to uint16_t costs performance in some compilers). simdjson_inline uint32_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); #else @@ -7706,7 +8178,7 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } - simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; // Unsigned bytes @@ -7724,7 +8196,7 @@ namespace { // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 @@ -7818,7 +8290,7 @@ namespace { uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; @@ -7848,7 +8320,7 @@ namespace { uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; @@ -7900,7 +8372,7 @@ namespace { // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 @@ -8021,7 +8493,7 @@ namespace { } simdjson_inline uint64_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t( 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 @@ -8154,6 +8626,10 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ @@ -8174,7 +8650,8 @@ class dom_parser_implementation; enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word }; } // namespace arm64 @@ -8622,11 +9099,13 @@ namespace numberparsing { #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) #endif namespace { @@ -8737,7 +9216,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -8957,6 +9436,10 @@ simdjson_inline bool parse_digit(const uint8_t c, I &i) { return true; } +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer @@ -9014,7 +9497,7 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger @@ -9036,6 +9519,23 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s return SUCCESS; } +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. @@ -9110,6 +9610,18 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, return SUCCESS; } +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING @@ -9141,7 +9653,6 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { - // // Check for minus sign // @@ -9188,11 +9699,11 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -9211,7 +9722,7 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } - // Write unsigned if it doesn't fit in a signed integer. + // Write unsigned if it does not fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { @@ -9662,19 +10173,32 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; } return number_type::signed_integer; } @@ -9852,6 +10376,7 @@ inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; default: SIMDJSON_UNREACHABLE(); } return out; @@ -10078,7 +10603,7 @@ SIMDJSON_NO_SANITIZE_UNDEFINED // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) // to the most significant bit (MSB) for a set bit (1). @@ -10094,9 +10619,15 @@ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } +// We sometimes call leading_zeroes on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +// Applies only when SIMDJSON_PREFER_REVERSE_BITS is defined and true. +// (See below.) +SIMDJSON_NO_SANITIZE_UNDEFINED /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). @@ -10149,7 +10680,7 @@ simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; #else @@ -10224,10 +10755,10 @@ simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { #include -#if _M_ARM64 +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 // __umulh requires intrin.h #include -#endif // _M_ARM64 +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 namespace simdjson { namespace arm64 { @@ -10247,13 +10778,13 @@ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -10266,7 +10797,13 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } // namespace arm64 } // namespace simdjson +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else #define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif #endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H /* end file simdjson/arm64/numberparsing_defs.h */ @@ -10286,7 +10823,7 @@ namespace arm64 { namespace { namespace simd { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO namespace { // Start of private section with Visual Studio workaround @@ -10395,7 +10932,7 @@ namespace { // We return uint32_t instead of uint16_t because that seems to be more efficient for most // purposes (cutting it down to uint16_t costs performance in some compilers). simdjson_inline uint32_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); #else @@ -10408,7 +10945,7 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } - simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; // Unsigned bytes @@ -10426,7 +10963,7 @@ namespace { // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 @@ -10520,7 +11057,7 @@ namespace { uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; @@ -10550,7 +11087,7 @@ namespace { uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); #else uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; @@ -10602,7 +11139,7 @@ namespace { // Array constructor simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 @@ -10723,7 +11260,7 @@ namespace { } simdjson_inline uint64_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +#if SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = simdjson_make_uint8x16_t( 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 @@ -12115,7 +12652,7 @@ simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { // up enough CPU: the second half of the functions is highly serial, only using 1 execution core // at a time. The second input's scans has some dependency on the first ones finishing it, but // they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that // to finish: utf-8 checks and generating the output from the last iteration. // // The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all @@ -12195,7 +12732,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -12980,8 +13517,8 @@ simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) constexpr uint32_t substitution_code_point = 0xfffd; // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; @@ -13038,8 +13575,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, // It is not ideal that this function is nearly identical to handle_unicode_codepoint. // // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); *src_ptr += 6; // If we found a high surrogate, we must @@ -13070,7 +13607,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -13676,6 +14213,7 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +SIMDJSON_NO_SANITIZE_MEMORY simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { return arm64::stringparsing::parse_string(src, dst, allow_replacement); } @@ -13707,204 +14245,277 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * #endif // SIMDJSON_SRC_ARM64_CPP /* end file arm64.cpp */ #endif -#if SIMDJSON_IMPLEMENTATION_FALLBACK -/* including fallback.cpp: #include */ -/* begin file fallback.cpp */ -#ifndef SIMDJSON_SRC_FALLBACK_CPP -#define SIMDJSON_SRC_FALLBACK_CPP +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* including haswell.cpp: #include */ +/* begin file haswell.cpp */ +#ifndef SIMDJSON_SRC_HASWELL_CPP +#define SIMDJSON_SRC_HASWELL_CPP /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* including simdjson/fallback.h: #include */ -/* begin file simdjson/fallback.h */ -#ifndef SIMDJSON_FALLBACK_H -#define SIMDJSON_FALLBACK_H +/* including simdjson/haswell.h: #include */ +/* begin file simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H -/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ -/* begin file simdjson/fallback/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ -#define SIMDJSON_IMPLEMENTATION fallback -/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ -/* begin file simdjson/fallback/base.h */ -#ifndef SIMDJSON_FALLBACK_BASE_H -#define SIMDJSON_FALLBACK_BASE_H +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { /** - * Fallback implementation (runs on any machine). + * Implementation for Haswell (Intel AVX2). */ -namespace fallback { +namespace haswell { class implementation; -} // namespace fallback +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_BASE_H -/* end file simdjson/fallback/base.h */ -/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ -/* begin file simdjson/fallback/bitmanipulation.h */ -#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H -#define SIMDJSON_FALLBACK_BITMANIPULATION_H +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// We enable bmi2 only if LLVM/clang is used, because GCC may not +// make good use of it. See https://github.com/simdjson/simdjson/pull/2243 +#if defined(__clang__) +SIMDJSON_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt") +#else +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace { -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); } -#endif /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} #else - return __builtin_clzll(input_num); -#endif// _MSC_VER +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif } } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H -/* end file simdjson/fallback/bitmanipulation.h */ -/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ -/* begin file simdjson/fallback/stringparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace { -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return c == '"'; } - simdjson_inline bool has_backslash() { return c == '\\'; } - simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } - - uint8_t c; -}; // struct backslash_and_quote - -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // store to dest unconditionally - we can overwrite the bits we don't like later - dst[0] = src[0]; - return { src[0] }; +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); } } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -/* end file simdjson/fallback/stringparsing_defs.h */ -/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ -/* begin file simdjson/fallback/numberparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace numberparsing { -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - return parse_eight_digits_unrolled(reinterpret_cast(chars)); -} - -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest } -#endif /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -13914,412 +14525,843 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } } // namespace numberparsing -} // namespace fallback +} // namespace haswell } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 -#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -/* end file simdjson/fallback/numberparsing_defs.h */ -/* end file simdjson/fallback/begin.h */ -/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for fallback */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif - -/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for fallback */ -#ifndef SIMDJSON_GENERIC_BASE_H +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { - -struct open_container; -class dom_parser_implementation; +namespace haswell { +namespace { +namespace simd { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; -} // namespace fallback -} // namespace simdjson + // Zero constructor + simdjson_inline base() : value{__m256i()} {} -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for fallback */ -/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for fallback */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } -namespace simdjson { -namespace fallback { -namespace { -namespace jsoncharutils { + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} + // Forward-declared so they can be used by splat and friends. + template + struct simd8; -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif + static const int SIZE = sizeof(base::value); -} // namespace jsoncharutils -} // unnamed namespace -} // namespace fallback -} // namespace simdjson + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for fallback */ -/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for fallback */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} -#include + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; -namespace simdjson { -namespace fallback { -namespace { -/// @private -namespace atomparsing { + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -} // namespace atomparsing -} // unnamed namespace -} // namespace fallback -} // namespace simdjson + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for fallback */ -/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } -namespace simdjson { -namespace fallback { + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } -}; + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } -} // namespace fallback -} // namespace simdjson + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } -namespace simdjson { -namespace fallback { + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } - _capacity = capacity; - return SUCCESS; -} + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 - _max_depth = max_depth; - return SUCCESS; -} +} // namespace simd -} // namespace fallback +} // unnamed namespace +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for fallback */ -/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { +namespace { -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { +using namespace simd; - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for haswell */ +/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for haswell */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for haswell */ +/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for haswell */ +/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for haswell */ +/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ simdjson_inline void tie(T &value, error_code &error) && noexcept; /** @@ -14388,13 +15430,13 @@ struct implementation_simdjson_result_base { error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ }; // struct implementation_simdjson_result_base -} // namespace fallback +} // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ -/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for fallback */ +/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for haswell */ #ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -14409,7 +15451,7 @@ struct implementation_simdjson_result_base { #include namespace simdjson { -namespace fallback { +namespace haswell { namespace numberparsing { #ifdef JSON_TEST_NUMBERS @@ -14417,11 +15459,13 @@ namespace numberparsing { #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) #endif namespace { @@ -14532,7 +15576,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -14752,6 +15796,10 @@ simdjson_inline bool parse_digit(const uint8_t c, I &i) { return true; } +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer @@ -14809,7 +15857,7 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger @@ -14831,6 +15879,23 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s return SUCCESS; } +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. @@ -14905,6 +15970,18 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, return SUCCESS; } +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING @@ -14936,7 +16013,6 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { - // // Check for minus sign // @@ -14983,11 +16059,11 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -15006,7 +16082,7 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } - // Write unsigned if it doesn't fit in a signed integer. + // Write unsigned if it does not fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { @@ -15457,19 +16533,32 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; } return number_type::signed_integer; } @@ -15647,19 +16736,20 @@ inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; default: SIMDJSON_UNREACHABLE(); } return out; } -} // namespace fallback +} // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for fallback */ +/* end file simdjson/generic/numberparsing.h for haswell */ -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -15669,7 +16759,7 @@ inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { // // internal::implementation_simdjson_result_base inline implementation @@ -15746,36 +16836,42 @@ template simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} -} // namespace fallback +} // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ -/* end file simdjson/generic/amalgamated.h for fallback */ -/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ -/* begin file simdjson/fallback/end.h */ +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +/* end file simdjson/generic/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ #undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/fallback/end.h */ +/* end file simdjson/haswell/end.h */ -#endif // SIMDJSON_FALLBACK_H -/* end file simdjson/fallback.h */ -/* including simdjson/fallback/implementation.h: #include */ -/* begin file simdjson/fallback/implementation.h */ -#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H -#define SIMDJSON_FALLBACK_IMPLEMENTATION_H +#endif // SIMDJSON_HASWELL_H +/* end file simdjson/haswell.h */ +/* including simdjson/haswell/implementation.h: #include */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { -namespace fallback { +namespace haswell { /** * @private @@ -15783,208 +16879,281 @@ namespace fallback { class implementation final : public simdjson::implementation { public: simdjson_inline implementation() : simdjson::implementation( - "fallback", - "Generic fallback implementation", - 0 + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, - std::unique_ptr& dst + std::unique_ptr& dst ) const noexcept final; simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H -/* end file simdjson/fallback/implementation.h */ +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ -/* including simdjson/fallback/begin.h: #include */ -/* begin file simdjson/fallback/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ -#define SIMDJSON_IMPLEMENTATION fallback -/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ -/* begin file simdjson/fallback/base.h */ -#ifndef SIMDJSON_FALLBACK_BASE_H -#define SIMDJSON_FALLBACK_BASE_H +/* including simdjson/haswell/begin.h: #include */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { /** - * Fallback implementation (runs on any machine). + * Implementation for Haswell (Intel AVX2). */ -namespace fallback { +namespace haswell { class implementation; -} // namespace fallback +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_BASE_H -/* end file simdjson/fallback/base.h */ -/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ -/* begin file simdjson/fallback/bitmanipulation.h */ -#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H -#define SIMDJSON_FALLBACK_BITMANIPULATION_H +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// We enable bmi2 only if LLVM/clang is used, because GCC may not +// make good use of it. See https://github.com/simdjson/simdjson/pull/2243 +#if defined(__clang__) +SIMDJSON_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt") +#else +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace { -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); } -#endif /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} #else - return __builtin_clzll(input_num); -#endif// _MSC_VER +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif } } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H -/* end file simdjson/fallback/bitmanipulation.h */ -/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ -/* begin file simdjson/fallback/stringparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace { -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return c == '"'; } - simdjson_inline bool has_backslash() { return c == '\\'; } - simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } - - uint8_t c; -}; // struct backslash_and_quote - -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // store to dest unconditionally - we can overwrite the bits we don't like later - dst[0] = src[0]; - return { src[0] }; +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); } } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -/* end file simdjson/fallback/stringparsing_defs.h */ -/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ -/* begin file simdjson/fallback/numberparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace numberparsing { -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - return parse_eight_digits_unrolled(reinterpret_cast(chars)); -} - -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest } -#endif /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -15994,4165 +17163,3303 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } } // namespace numberparsing -} // namespace fallback +} // namespace haswell } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 -#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -/* end file simdjson/fallback/numberparsing_defs.h */ -/* end file simdjson/fallback/begin.h */ -/* including generic/stage1/find_next_document_index.h for fallback: #include */ -/* begin file generic/stage1/find_next_document_index.h for fallback */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace { -namespace stage1 { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} +namespace simd { -} // namespace stage1 -} // unnamed namespace -} // namespace fallback -} // namespace simdjson + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; -#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -/* end file generic/stage1/find_next_document_index.h for fallback */ -/* including generic/stage2/stringparsing.h for fallback: #include */ -/* begin file generic/stage2/stringparsing.h for fallback */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + // Zero constructor + simdjson_inline base() : value{__m256i()} {} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } -namespace simdjson { -namespace fallback { -namespace { -/// @private -namespace stringparsing { + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Forward-declared so they can be used by splat and friends. + template + struct simd8; - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr, bool allow_replacement) { - // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - constexpr uint32_t substitution_code_point = 0xfffd; - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; + static const int SIZE = sizeof(base::value); - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; - // We have already checked that the high surrogate is valid and - // (code_point - 0xd800) < 1024. - // - // Check that code_point_2 is in the range 0xdc00..0xdfff - // and that code_point_2 was parsed from valid hex. - uint32_t low_bit = code_point_2 - 0xdc00; - if (low_bit >> 10) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } - } - } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { - // If we encounter a low surrogate (not preceded by a high surrogate) - // then we have an error. - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; -// handle a unicode codepoint using the wobbly convention -// https://simonsapin.github.io/wtf-8/ -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // It is not ideal that this function is nearly identical to handle_unicode_codepoint. - // - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); - uint32_t low_bit = code_point_2 - 0xdc00; - if ((low_bit >> 10) == 0) { - code_point = - (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); } - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } -/** - * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There - * must be an unescaped quote terminating the string. It returns the final output - * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large - * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + - * SIMDJSON_PADDING bytes. - */ -simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); } - } -} -simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { - // It is not ideal that this function is nearly identical to parse_string. - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint_wobbly(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); } - } -} + }; -} // namespace stringparsing -} // unnamed namespace -} // namespace fallback -} // namespace simdjson + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H -/* end file generic/stage2/stringparsing.h for fallback */ -/* including generic/stage2/logger.h for fallback: #include */ -/* begin file generic/stage2/logger.h for fallback */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -#include + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace fallback { -namespace { -namespace logger { + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed - static int log_depth; // Not threadsafe. Log only. + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - // Helper to turn unprintable or newline characters into spaces - static simdjson_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); } - } - // Print the header and set up log_start - static simdjson_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); } - } - simdjson_unused static simdjson_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); } - } - // Logs a single line from the stage 2 DOM parser - template - static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i reduce_or() const { + return this->chunks[0] | this->chunks[1]; } - } -} // namespace logger + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H -/* end file generic/stage2/logger.h for fallback */ -/* including generic/stage2/json_iterator.h for fallback: #include */ -/* begin file generic/stage2/json_iterator.h for fallback */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace haswell { namespace { -namespace stage2 { -class json_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; - uint32_t depth{0}; - - /** - * Walk the JSON document. - * - * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as - * the first parameter; some callbacks have other parameters as well: - * - * - visit_document_start() - at the beginning. - * - visit_document_end() - at the end (if things were successful). - * - * - visit_array_start() - at the start `[` of a non-empty array. - * - visit_array_end() - at the end `]` of a non-empty array. - * - visit_empty_array() - when an empty array is encountered. - * - * - visit_object_end() - at the start `]` of a non-empty object. - * - visit_object_start() - at the end `]` of a non-empty object. - * - visit_empty_object() - when an empty object is encountered. - * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is - * guaranteed to point at the first quote of the string (`"key"`). - * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. - * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. - * - * - increment_count(iter) - each time a value is found in an array or object. - */ - template - simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; +using namespace simd; - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_inline bool at_beginning() const noexcept; - simdjson_inline uint8_t last_structural() const noexcept; + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_error(const char *error) const noexcept; + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote - template - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including generic/amalgamated.h for haswell: #include */ +/* begin file generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif - // - // Read first value - // - { - auto value = advance(); +/* including generic/base.h for haswell: #include */ +/* begin file generic/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; +namespace simdjson { +namespace haswell { +namespace { -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); +struct json_character_block; - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } +} // unnamed namespace +} // namespace haswell +} // namespace simdjson -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for haswell */ +/* including generic/dom_parser_implementation.h for haswell: #include */ +/* begin file generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace haswell { +namespace { -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } +} // unnamed namespace +} // namespace haswell +} // namespace simdjson -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for haswell */ +/* including generic/json_character_block.h for haswell: #include */ +/* begin file generic/json_character_block.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); +namespace simdjson { +namespace haswell { +namespace { - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - return SUCCESS; + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } -} // walk_document() + uint64_t _whitespace; + uint64_t _op; +}; -simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} +} // unnamed namespace +} // namespace haswell +} // namespace simdjson -simdjson_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for haswell */ +/* end file generic/amalgamated.h for haswell */ +/* including generic/stage1/amalgamated.h for haswell: #include */ +/* begin file generic/stage1/amalgamated.h for haswell */ +// Stuff other things depend on +/* including generic/stage1/base.h for haswell: #include */ +/* begin file generic/stage1/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H -simdjson_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { -simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; -simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} +} // namespace stage1 -simdjson_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - // Use the fact that most scalars are going to be either strings or numbers. - if(*value == '"') { - return visitor.visit_string(*this, value); - } else if (((*value - '0') < 10) || (*value == '-')) { - return visitor.visit_number(*this, value); - } - // true, false, null are uncommon. - switch (*value) { - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} +using utf8_validation::utf8_checker; -} // namespace stage2 } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H -/* end file generic/stage2/json_iterator.h for fallback */ -/* including generic/stage2/tape_writer.h for fallback: #include */ -/* begin file generic/stage2/tape_writer.h for fallback */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for haswell */ +/* including generic/stage1/buf_block_reader.h for haswell: #include */ +/* begin file generic/stage1/buf_block_reader.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { -namespace fallback { +namespace haswell { namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; +namespace stage1 { +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; /** - * Skip the current tape entry without writing. + * Get the last block, padded with spaces. * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. * - * Used to go back and write out the start of a container after the container ends. + * @return the number of effective characters in the last block. */ - simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct tape_writer + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; -simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; } -/** Write a double value to tape. */ -simdjson_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdjson_inline void tape_writer::skip() noexcept { - next_tape_loc++; +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; } -simdjson_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} -simdjson_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; } -simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; } -template -simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; } -simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; } -} // namespace stage2 +} // namespace stage1 } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H -/* end file generic/stage2/tape_writer.h for fallback */ -/* including generic/stage2/tape_builder.h for fallback: #include */ -/* begin file generic/stage2/tape_builder.h for fallback */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for haswell */ +/* including generic/stage1/json_escape_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_escape_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - namespace simdjson { -namespace fallback { +namespace haswell { namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; +namespace stage1 { - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** - * Called when a key in a field is encountered. + * Get a mask of both escape and escaped characters (the characters following a backslash). * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') */ - simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. */ - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // - simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; - simdjson_inline tape_builder(dom::document &doc) noexcept; +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_inline void on_end_string(uint8_t *dst) noexcept; -}; // struct tape_builder +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for haswell */ +/* including generic/stage1/json_string_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_string_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -template -simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } -simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; -simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. + // Check if we're still in a string at the end of the box so the next block will know // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} + prev_in_string = uint64_t(static_cast(in_string) >> 63); -simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} + // Use ^ to turn the beginning quote off, and the end quote on. -simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); } -simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } return SUCCESS; } -simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 +} // namespace stage1 } // unnamed namespace -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -/* end file generic/stage2/tape_builder.h for fallback */ +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for haswell */ +/* including generic/stage1/utf8_lookup4_algorithm.h for haswell: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H -// -// Stage 1 -// +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) fallback::dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - +namespace haswell { namespace { -namespace stage1 { +namespace utf8_validation { -class structural_scanner { -public: +using namespace simd; -simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) - : buf{_parser.buf}, - next_structural_index{_parser.structural_indexes.get()}, - parser{_parser}, - len{static_cast(_parser.len)}, - partial{_partial} { -} + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ -simdjson_inline void add_structural() { - *next_structural_index = idx; - next_structural_index++; -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, -simdjson_inline bool is_continuation(uint8_t c) { - return (c & 0xc0) == 0x80; -} + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -simdjson_inline void validate_utf8_character() { - // Continuation - if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { - // extra continuation - error = UTF8_ERROR; - idx++; - return; - } + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - // 2-byte - if ((buf[idx] & 0x20) == 0) { - // missing continuation - if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { - if (idx+1 > len && is_streaming(partial)) { idx = len; return; } - error = UTF8_ERROR; - idx++; - return; - } - // overlong: 1100000_ 10______ - if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } - idx += 2; - return; - } + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - // 3-byte - if ((buf[idx] & 0x10) == 0) { - // missing continuation - if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { - if (idx+2 > len && is_streaming(partial)) { idx = len; return; } - error = UTF8_ERROR; - idx++; - return; - } - // overlong: 11100000 100_____ ________ - if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; } - // surrogates: U+D800-U+DFFF 11101101 101_____ - if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; } - idx += 3; - return; + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); } - - // 4-byte - // missing continuation - if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { - if (idx+2 > len && is_streaming(partial)) { idx = len; return; } - error = UTF8_ERROR; - idx++; - return; + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; } - // overlong: 11110000 1000____ ________ ________ - if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; } - // too large: > U+10FFFF: - // 11110100 (1001|101_)____ - // 1111(1___|011_|0101) 10______ - // also includes 5, 6, 7 and 8 byte characters: - // 11111___ - if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; } - if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } - idx += 4; -} -// Returns true if the string is unclosed. -simdjson_inline bool validate_string() { - idx++; // skip first quote - while (idx < len && buf[idx] != '"') { - if (buf[idx] == '\\') { - idx += 2; - } else if (simdjson_unlikely(buf[idx] & 0x80)) { - validate_utf8_character(); - } else { - if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } - idx++; - } + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); } - if (idx >= len) { return true; } - return false; -} -simdjson_inline bool is_whitespace_or_operator(uint8_t c) { - switch (c) { - case '{': case '}': case '[': case ']': case ',': case ':': - case ' ': case '\r': case '\n': case '\t': - return true; - default: - return false; - } -} + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; -// -// Parse the entire input in STEP_SIZE-byte chunks. -// -simdjson_inline error_code scan() { - bool unclosed_string = false; - for (;idx input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - } - // We pad beyond. - // https://github.com/simdjson/simdjson/issues/906 - // See json_structural_indexer.h for an explanation. - *next_structural_index = len; // assumed later in partial == stage1_mode::streaming_final - next_structural_index[1] = len; - next_structural_index[2] = 0; - parser.n_structural_indexes = uint32_t(next_structural_index - parser.structural_indexes.get()); - if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return EMPTY; } - parser.next_structural_index = 0; - if (partial == stage1_mode::streaming_partial) { - if(unclosed_string) { - parser.n_structural_indexes--; - if (simdjson_unlikely(parser.n_structural_indexes == 0)) { return CAPACITY; } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; } } - parser.n_structural_indexes = new_structural_indexes; - } else if(partial == stage1_mode::streaming_final) { - if(unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (parser.n_structural_indexes == 0) { return EMPTY; } - } else if(unclosed_string) { error = UNCLOSED_STRING; } - return error; -} + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } -private: - const uint8_t *buf; - uint32_t *next_structural_index; - dom_parser_implementation &parser; - uint32_t len; - uint32_t idx{0}; - error_code error{SUCCESS}; - stage1_mode partial; -}; // structural_scanner + }; // struct utf8_checker +} // namespace utf8_validation -} // namespace stage1 } // unnamed namespace +} // namespace haswell +} // namespace simdjson -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { - this->buf = _buf; - this->len = _len; - stage1::structural_scanner scanner(*this, partial); - return scanner.scan(); -} +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for haswell */ +/* including generic/stage1/json_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H -// big table for the minifier -static uint8_t jump_table[256 * 3] = { - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, - 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, -}; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - size_t i = 0, pos = 0; - uint8_t quote = 0; - uint8_t nonescape = 1; +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { - while (i < len) { - unsigned char c = buf[i]; - uint8_t *meta = jump_table + 3 * c; +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - quote = quote ^ (meta[0] & nonescape); - dst[pos] = c; - pos += meta[2] | quote; + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - i += 1; - nonescape = uint8_t(~nonescape) | (meta[1]); - } - dst_len = pos; // we intentionally do not work with a reference - // for fear of aliasing - return quote ? UNCLOSED_STRING : SUCCESS; -} + // Helpers -// credit: based on code from Google Fuchsia (Apache Licensed) -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 8 bytes are ascii. - uint64_t next_pos = pos + 16; - if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v1; - memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - if (byte < 0x80) { - pos++; - continue; - } else if ((byte & 0xe0) == 0xc0) { - next_pos = pos + 2; - if (next_pos > len) { return false; } - if ((data[pos + 1] & 0xc0) != 0x80) { return false; } - // range check - code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); - if (code_point < 0x80 || 0x7ff < code_point) { return false; } - } else if ((byte & 0xf0) == 0xe0) { - next_pos = pos + 3; - if (next_pos > len) { return false; } - if ((data[pos + 1] & 0xc0) != 0x80) { return false; } - if ((data[pos + 2] & 0xc0) != 0x80) { return false; } - // range check - code_point = (byte & 0x0f) << 12 | - (data[pos + 1] & 0x3f) << 6 | - (data[pos + 2] & 0x3f); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return false; - } - } else if ((byte & 0xf8) == 0xf0) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { return false; } - if ((data[pos + 1] & 0xc0) != 0x80) { return false; } - if ((data[pos + 2] & 0xc0) != 0x80) { return false; } - if ((data[pos + 3] & 0xc0) != 0x80) { return false; } - // range check - code_point = - (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | - (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); - if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } - } else { - // we may have a continuation - return false; - } - pos = next_pos; - } - return true; -} + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } -} // namespace fallback -} // namespace simdjson + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) -// -// Stage 2 -// + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; -namespace simdjson { -namespace fallback { +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { - return fallback::stringparsing::parse_string(src, dst, replacement_char); +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; } -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { - return fallback::stringparsing::parse_wobbly_string(src, dst); +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); } -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); } -} // namespace fallback +} // namespace stage1 +} // unnamed namespace +} // namespace haswell } // namespace simdjson -/* including simdjson/fallback/end.h: #include */ -/* begin file simdjson/fallback/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/fallback/end.h */ - -#endif // SIMDJSON_SRC_FALLBACK_CPP -/* end file fallback.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_HASWELL -/* including haswell.cpp: #include */ -/* begin file haswell.cpp */ -#ifndef SIMDJSON_SRC_HASWELL_CPP -#define SIMDJSON_SRC_HASWELL_CPP - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -/* including simdjson/haswell.h: #include */ -/* begin file simdjson/haswell.h */ -#ifndef SIMDJSON_HASWELL_H -#define SIMDJSON_HASWELL_H - -/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ -/* begin file simdjson/haswell/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ -#define SIMDJSON_IMPLEMENTATION haswell +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for haswell */ -/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ -/* begin file simdjson/haswell/base.h */ -#ifndef SIMDJSON_HASWELL_BASE_H -#define SIMDJSON_HASWELL_BASE_H +// All other declarations +/* including generic/stage1/find_next_document_index.h for haswell: #include */ +/* begin file generic/stage1/find_next_document_index.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { -/** - * Implementation for Haswell (Intel AVX2). - */ namespace haswell { +namespace { +namespace stage1 { -class implementation; +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} -namespace { -namespace simd { -template struct simd8; -template struct simd8x64; -} // namespace simd +} // namespace stage1 } // unnamed namespace - } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_HASWELL_BASE_H -/* end file simdjson/haswell/base.h */ -/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ -/* begin file simdjson/haswell/intrinsics.h */ -#ifndef SIMDJSON_HASWELL_INTRINSICS_H -#define SIMDJSON_HASWELL_INTRINSICS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for haswell */ +/* including generic/stage1/json_minifier.h for haswell: #include */ +/* begin file generic/stage1/json_minifier.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO - -static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); - -#endif // SIMDJSON_HASWELL_INTRINSICS_H -/* end file simdjson/haswell/intrinsics.h */ - -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") -#endif - -/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ -/* begin file simdjson/haswell/bitmanipulation.h */ -#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H -#define SIMDJSON_HASWELL_BITMANIPULATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) namespace simdjson { namespace haswell { namespace { +namespace stage1 { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); } -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_BITMANIPULATION_H -/* end file simdjson/haswell/bitmanipulation.h */ -/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ -/* begin file simdjson/haswell/bitmask.h */ -#ifndef SIMDJSON_HASWELL_BITMASK_H -#define SIMDJSON_HASWELL_BITMASK_H +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); -namespace simdjson { -namespace haswell { -namespace { + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); } +} // namespace stage1 } // unnamed namespace } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_HASWELL_BITMASK_H -/* end file simdjson/haswell/bitmask.h */ -/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ -/* begin file simdjson/haswell/numberparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for haswell */ +/* including generic/stage1/json_structural_indexer.h for haswell: #include */ +/* begin file generic/stage1/json_structural_indexer.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) namespace simdjson { namespace haswell { -namespace numberparsing { - -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} +namespace { +namespace stage1 { -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} +class bit_indexer { +public: + uint32_t *tail; -} // namespace numberparsing -} // namespace haswell -} // namespace simdjson + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} -#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ -#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -/* end file simdjson/haswell/numberparsing_defs.h */ -/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ -/* begin file simdjson/haswell/simd.h */ -#ifndef SIMDJSON_HASWELL_SIMD_H -#define SIMDJSON_HASWELL_SIMD_H + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } -namespace simdjson { -namespace haswell { -namespace { -namespace simd { + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m256i value; + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; - // Zero constructor - simdjson_inline base() : value{__m256i()} {} + int cnt = static_cast(count_ones(bits)); - // Conversion from SIMD register - simdjson_inline base(const __m256i _value) : value(_value) {} +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; - // Conversion to SIMD register - simdjson_inline operator const __m256i&() const { return this->value; } - simdjson_inline operator __m256i&() { return this->value; } + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; i(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; + this->tail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - // Forward-declared so they can be used by splat and friends. - template - struct simd8; +}; - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m256i _value) : base>(_value) {} +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; - static const int SIZE = sizeof(base::value); +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); - simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} - // Store to array - simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint32_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in four steps, first 8 bytes and then second 8 bytes... - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits - uint8_t mask3 = uint8_t(mask >> 16); // ... - uint8_t mask4 = uint8_t(mask >> 24); // ... - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], - thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask and so forth - shufmask = - _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, - 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); - // we still need to put the pieces back together. - // we compute the popcount of the first words: - int pop1 = BitsSetTable256mul2[mask1]; - int pop3 = BitsSetTable256mul2[mask3]; - - // then load the corresponding mask - // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. - __m256i v256 = _mm256_castsi128_si256( - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); - __m256i compactmask = _mm256_insertf128_si256(v256, - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); - __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); - // We just need to write out the result. - // This is the tricky bit that is hard to do - // if we want to return a SIMD register, since there - // is no single-instruction approach to recombine - // the two 128-bit lanes with an offset. - __m128i v128; - v128 = _mm256_castsi256_si128(almostthere); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); - v128 = _mm256_extractf128_si256(almostthere, 1); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); - } - - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; - - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } - }; - - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } - - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } - - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - uint32_t mask1 = uint32_t(mask); - uint32_t mask2 = uint32_t(mask >> 32); - this->chunks[0].compress(mask1, output); - this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); - return 64 - count_ones(mask); - } - - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - } - - simdjson_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdjson_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask - ); - } - - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask - ).to_bitmask(); + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On-Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } } - - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1] - ).to_bitmask(); + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } } - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask - ).to_bitmask(); + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; } - }; // struct simd8x64 - -} // namespace simd + } + checker.check_eof(); + return checker.errors(); +} +} // namespace stage1 } // unnamed namespace } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_HASWELL_SIMD_H -/* end file simdjson/haswell/simd.h */ -/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ -/* begin file simdjson/haswell/stringparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for haswell */ +/* including generic/stage1/utf8_validator.h for haswell: #include */ +/* begin file generic/stage1/utf8_validator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace { +namespace stage1 { -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast((v == '\\').to_bitmask()), // bs_bits - static_cast((v == '"').to_bitmask()), // quote_bits - }; +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); } +} // namespace stage1 } // unnamed namespace } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -/* end file simdjson/haswell/stringparsing_defs.h */ -/* end file simdjson/haswell/begin.h */ -/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for haswell */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif - -/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for haswell */ -#ifndef SIMDJSON_GENERIC_BASE_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for haswell */ +/* end file generic/stage1/amalgamated.h for haswell */ +/* including generic/stage2/amalgamated.h for haswell: #include */ +/* begin file generic/stage2/amalgamated.h for haswell */ +// Stuff other things depend on +/* including generic/stage2/base.h for haswell: #include */ +/* begin file generic/stage2/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { +namespace { +namespace stage2 { -struct open_container; -class dom_parser_implementation; - -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; +} // namespace stage2 +} // unnamed namespace } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for haswell */ -/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for haswell */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for haswell */ +/* including generic/stage2/tape_writer.h for haswell: #include */ +/* begin file generic/stage2/tape_writer.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { namespace haswell { namespace { -namespace jsoncharutils { +namespace stage2 { -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; -} // namespace jsoncharutils -} // unnamed namespace -} // namespace haswell -} // namespace simdjson + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for haswell */ -/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for haswell */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; -#include + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; -namespace simdjson { -namespace haswell { -namespace { -/// @private -namespace atomparsing { +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); } -} // namespace atomparsing +} // namespace stage2 } // unnamed namespace } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for haswell */ -/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for haswell */ +/* including generic/stage2/logger.h for haswell: #include */ +/* begin file generic/stage2/logger.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace haswell { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; +#include -} // namespace haswell -} // namespace simdjson +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! namespace simdjson { namespace haswell { +namespace { +namespace logger { -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; - _capacity = capacity; - return SUCCESS; -} + static int log_depth; // Not threadsafe. Log only. -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } - _max_depth = max_depth; - return SUCCESS; -} + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { +namespace { +namespace stage2 { -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; /** - * Create a new empty result with error = UNINITIALIZED. + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** - * Create a new error result. + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** - * Create a new successful result. + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; - + simdjson_inline const uint8_t *peek() const noexcept; /** - * Create a new result with both things (use if you don't want to branch when creating the result). + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; - + simdjson_inline const uint8_t *advance() noexcept; /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + * Get the remaining length of the document, from the start of the current token. */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; - + simdjson_inline size_t remaining_len() const noexcept; /** - * Move the value to the provided variable. + * Check if we are at the end of the document. * - * @param value The variable to assign the value to. May not be set if there is an error. + * If this is true, there are no more tokens. */ - simdjson_inline error_code get(T &value) && noexcept; - + simdjson_inline bool at_eof() const noexcept; /** - * The error. + * Check if we are at the beginning of the document. */ - simdjson_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** - * Get the result value. + * Log that a value has been found. * - * @throw simdjson_error if there was an error. + * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_inline T& value() & noexcept(false); - + simdjson_inline void log_value(const char *type) const noexcept; /** - * Take the result value (move it). + * Log the start of a multipart value. * - * @throw simdjson_error if there was an error. + * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_inline T&& value() && noexcept(false); - + simdjson_inline void log_start_value(const char *type) const noexcept; /** - * Take the result value (move it). + * Log the end of a multipart value. * - * @throw simdjson_error if there was an error. + * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_inline T&& take_value() && noexcept(false); - + simdjson_inline void log_end_value(const char *type) const noexcept; /** - * Cast to the value (will throw on error). + * Log an error. * - * @throw simdjson_error if there was an error. + * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_inline operator T&&() && noexcept(false); + simdjson_inline void log_error(const char *error) const noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; -#endif // SIMDJSON_EXCEPTIONS +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base - -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ -/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for haswell */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); -#include -#include -#include + // + // Read first value + // + { + auto value = advance(); -namespace simdjson { -namespace haswell { -namespace numberparsing { + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; -namespace { +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; } - return true; } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; } +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } + return SUCCESS; - mantissa += mantissa & 1; - mantissa >>= 1; +} // walk_document() - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { } -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; } - -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); } -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; } - -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } } +} // namespace stage2 } // unnamed namespace +} // namespace haswell +} // namespace simdjson -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for haswell */ +/* including generic/stage2/stringparsing.h for haswell: #include */ +/* begin file generic/stage2/stringparsing.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; } - return INVALID_NUMBER(src); + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; } -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } } } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; } -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } } -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +} // namespace stringparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for haswell */ +/* including generic/stage2/structural_iterator.h for haswell: #include */ +/* begin file generic/stage2/structural_iterator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); } - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; -// Inlineable functions -namespace { +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for haswell */ +/* including generic/stage2/tape_builder.h for haswell: #include */ +/* begin file generic/stage2/tape_builder.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; - return i; -} + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - return i; -} + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } + simdjson_inline tape_builder(dom::document &doc) noexcept; - return i; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // - // Parse the integer part. + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} - exponent += exp_neg ? 0-exp : exp; - } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +// private: - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; } -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - exponent += exp_neg ? 0-exp : exp; - } +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for haswell */ +/* end file generic/stage2/amalgamated.h for haswell */ - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +// +// Stage 1 +// - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +namespace simdjson { +namespace haswell { - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +namespace { - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +using namespace simd; + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + // The 6 operators (:,[]{}) have these values: // - // Parse the decimal part. + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } - + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). // - // Parse the exponent + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); - if (*p != '"') { return NUMBER_ERROR; } + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + const uint64_t whitespace = in.eq({ + _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm256_shuffle_epi8(op_table, in.chunks[0]), + _mm256_shuffle_epi8(op_table, in.chunks[1]) + }); - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; + return { whitespace, op }; } -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} -} // namespace numberparsing +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; } +} // unnamed namespace } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for haswell */ - -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace haswell { - // -// internal::implementation_simdjson_result_base inline implementation +// Stage 2 // -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} - -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} +// +// Implementation-specific overrides +// +namespace simdjson { +namespace haswell { -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); } -#if SIMDJSON_EXCEPTIONS - -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); } -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::stage1::generic_validate_utf8(buf,len); } -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); } -#endif // SIMDJSON_EXCEPTIONS - -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return haswell::stringparsing::parse_string(src, dst, replacement_char); } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return haswell::stringparsing::parse_wobbly_string(src, dst); } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} - } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ -/* end file simdjson/generic/amalgamated.h for haswell */ -/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* including simdjson/haswell/end.h: #include */ /* begin file simdjson/haswell/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ @@ -20166,90 +20473,58 @@ SIMDJSON_UNTARGET_REGION #undef SIMDJSON_IMPLEMENTATION /* end file simdjson/haswell/end.h */ -#endif // SIMDJSON_HASWELL_H -/* end file simdjson/haswell.h */ -/* including simdjson/haswell/implementation.h: #include */ -/* begin file simdjson/haswell/implementation.h */ -#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H -#define SIMDJSON_HASWELL_IMPLEMENTATION_H +#endif // SIMDJSON_SRC_HASWELL_CPP +/* end file haswell.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* including icelake.cpp: #include */ +/* begin file icelake.cpp */ +#ifndef SIMDJSON_SRC_ICELAKE_CPP +#define SIMDJSON_SRC_ICELAKE_CPP /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL -namespace simdjson { -namespace haswell { - -/** - * @private - */ -class implementation final : public simdjson::implementation { -public: - simdjson_inline implementation() : simdjson::implementation( - "haswell", - "Intel/AMD AVX2", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H -/* end file simdjson/haswell/implementation.h */ - -/* including simdjson/haswell/begin.h: #include */ -/* begin file simdjson/haswell/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ -#define SIMDJSON_IMPLEMENTATION haswell +/* including simdjson/icelake.h: #include */ +/* begin file simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H -/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ -/* begin file simdjson/haswell/base.h */ -#ifndef SIMDJSON_HASWELL_BASE_H -#define SIMDJSON_HASWELL_BASE_H +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE namespace simdjson { /** - * Implementation for Haswell (Intel AVX2). + * Implementation for Icelake (Intel AVX512). */ -namespace haswell { +namespace icelake { class implementation; -namespace { -namespace simd { -template struct simd8; -template struct simd8x64; -} // namespace simd -} // unnamed namespace - -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_HASWELL_BASE_H -/* end file simdjson/haswell/base.h */ -/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ -/* begin file simdjson/haswell/intrinsics.h */ -#ifndef SIMDJSON_HASWELL_INTRINSICS_H -#define SIMDJSON_HASWELL_INTRINSICS_H +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #if SIMDJSON_VISUAL_STUDIO @@ -20286,6 +20561,14 @@ template struct simd8x64; #include #include #include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 @@ -20294,28 +20577,27 @@ template struct simd8x64; #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO -static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); -#endif // SIMDJSON_HASWELL_INTRINSICS_H -/* end file simdjson/haswell/intrinsics.h */ +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") #endif -/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ -/* begin file simdjson/haswell/bitmanipulation.h */ -#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H -#define SIMDJSON_HASWELL_BITMANIPULATION_H +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace icelake { namespace { // We sometimes call trailing_zero on inputs that are zero, @@ -20351,7 +20633,7 @@ simdjson_inline int leading_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel + // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else @@ -20372,23 +20654,23 @@ simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, } } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_HASWELL_BITMANIPULATION_H -/* end file simdjson/haswell/bitmanipulation.h */ -/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ -/* begin file simdjson/haswell/bitmask.h */ -#ifndef SIMDJSON_HASWELL_BITMASK_H -#define SIMDJSON_HASWELL_BITMASK_H +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace icelake { namespace { // @@ -20405,112 +20687,72 @@ simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { } } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_HASWELL_BITMASK_H -/* end file simdjson/haswell/bitmask.h */ -/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ -/* begin file simdjson/haswell/numberparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace haswell { -namespace numberparsing { - -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); } +#endif // SIMDJSON_GCC8 -} // namespace numberparsing -} // namespace haswell -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -/* end file simdjson/haswell/numberparsing_defs.h */ -/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ -/* begin file simdjson/haswell/simd.h */ -#ifndef SIMDJSON_HASWELL_SIMD_H -#define SIMDJSON_HASWELL_SIMD_H -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace icelake { namespace { namespace simd { // Forward-declared so they can be used by splat and friends. template struct base { - __m256i value; + __m512i value; // Zero constructor - simdjson_inline base() : value{__m256i()} {} + simdjson_inline base() : value{__m512i()} {} // Conversion from SIMD register - simdjson_inline base(const __m256i _value) : value(_value) {} + simdjson_inline base(const __m512i _value) : value(_value) {} // Conversion to SIMD register - simdjson_inline operator const __m256i&() const { return this->value; } - simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } @@ -20526,39 +20768,41 @@ namespace simd { typedef uint64_t bitmask2_t; simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m256i _value) : base>(_value) {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } static const int SIZE = sizeof(base::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m256i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - - simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( @@ -20566,6 +20810,10 @@ namespace simd { T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, @@ -20574,14 +20822,14 @@ namespace simd { } simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} // Store to array - simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } @@ -20591,60 +20839,23 @@ namespace simd { // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); + return _mm512_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_inline void compress(uint32_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in four steps, first 8 bytes and then second 8 bytes... - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits - uint8_t mask3 = uint8_t(mask >> 16); // ... - uint8_t mask4 = uint8_t(mask >> 24); // ... - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], - thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask and so forth - shufmask = - _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, - 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); - // we still need to put the pieces back together. - // we compute the popcount of the first words: - int pop1 = BitsSetTable256mul2[mask1]; - int pop3 = BitsSetTable256mul2[mask3]; - - // then load the corresponding mask - // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. - __m256i v256 = _mm256_castsi128_si256( - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); - __m256i compactmask = _mm256_insertf128_si256(v256, - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); - __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); - // We just need to write out the result. - // This is the tricky bit that is hard to do - // if we want to return a SIMD register, since there - // is no single-instruction approach to recombine - // the two 128-bit lanes with an offset. - __m128i v128; - v128 = _mm256_castsi256_si128(almostthere); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); - v128 = _mm256_extractf128_si256(almostthere, 1); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + simdjson_inline void compress(uint64_t mask, L * output) const { + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask } template @@ -20666,29 +20877,42 @@ namespace simd { template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, @@ -20697,39 +20921,53 @@ namespace simd { } // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, @@ -20738,45 +20976,48 @@ namespace simd { } // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed @@ -20784,84 +21025,65 @@ namespace simd { simd8x64() = delete; // no default constructor allowed simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - uint32_t mask1 = uint32_t(mask); - uint32_t mask2 = uint32_t(mask >> 32); - this->chunks[0].compress(mask1, output); - this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + this->chunks[0].compress(mask, output); return 64 - count_ones(mask); } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - } - - simdjson_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); } simdjson_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; + return this->chunks[0]; } simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask + this->chunks[0] | mask ); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask - ).to_bitmask(); + return this->chunks[0] == mask; } simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1] - ).to_bitmask(); + return this->chunks[0] == other.chunks[0]; } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask - ).to_bitmask(); + return this->chunks[0] <= mask; } }; // struct simd8x64 } // namespace simd } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_HASWELL_SIMD_H -/* end file simdjson/haswell/simd.h */ -/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ -/* begin file simdjson/haswell/stringparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace icelake { namespace { using namespace simd; @@ -20869,7 +21091,7 @@ using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: - static constexpr uint32_t BYTES_PROCESSED = 32; + static constexpr uint32_t BYTES_PROCESSED = 64; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } @@ -20877,8 +21099,8 @@ struct backslash_and_quote { simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - uint32_t bs_bits; - uint32_t quote_bits; + uint64_t bs_bits; + uint64_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { @@ -20889,16291 +21111,27990 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin // store to dest unconditionally - we can overwrite the bits we don't like later v.store(dst); return { - static_cast((v == '\\').to_bitmask()), // bs_bits - static_cast((v == '"').to_bitmask()), // quote_bits + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits }; } } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -/* end file simdjson/haswell/stringparsing_defs.h */ -/* end file simdjson/haswell/begin.h */ -/* including generic/amalgamated.h for haswell: #include */ -/* begin file generic/amalgamated.h for haswell */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) -#error generic/dependencies.h must be included before generic/amalgamated.h! -#endif - -/* including generic/base.h for haswell: #include */ -/* begin file generic/base.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_BASE_H +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { -namespace { - -struct json_character_block; - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson +namespace icelake { +namespace numberparsing { -#endif // SIMDJSON_SRC_GENERIC_BASE_H -/* end file generic/base.h for haswell */ -/* including generic/dom_parser_implementation.h for haswell: #include */ -/* begin file generic/dom_parser_implementation.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} -// Interface a dom parser implementation must fulfill -namespace simdjson { -namespace haswell { -namespace { +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); -simdjson_inline bool is_ascii(const simd8x64& input); +#define SIMDJSON_SWAR_NUMBER_PARSING 1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif -#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file generic/dom_parser_implementation.h for haswell */ -/* including generic/json_character_block.h for haswell: #include */ -/* begin file generic/json_character_block.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { -namespace { - -struct json_character_block { - static simdjson_inline json_character_block classify(const simd::simd8x64& in); +namespace icelake { - simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_inline uint64_t op() const noexcept { return _op; } - simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } +struct open_container; +class dom_parser_implementation; - uint64_t _whitespace; - uint64_t _op; +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word }; -} // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H -/* end file generic/json_character_block.h for haswell */ -/* end file generic/amalgamated.h for haswell */ -/* including generic/stage1/amalgamated.h for haswell: #include */ -/* begin file generic/stage1/amalgamated.h for haswell */ -// Stuff other things depend on -/* including generic/stage1/base.h for haswell: #include */ -/* begin file generic/stage1/base.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for icelake */ +/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for icelake */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace icelake { namespace { -namespace stage1 { +namespace jsoncharutils { -class bit_indexer; -template -struct buf_block_reader; -struct json_block; -class json_minifier; -class json_scanner; -struct json_string_block; -class json_string_scanner; -class json_structural_indexer; +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} -} // namespace stage1 +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} -namespace utf8_validation { -struct utf8_checker; -} // namespace utf8_validation +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} -using utf8_validation::utf8_checker; +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif +} // namespace jsoncharutils } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H -/* end file generic/stage1/base.h for haswell */ -/* including generic/stage1/buf_block_reader.h for haswell: #include */ -/* begin file generic/stage1/buf_block_reader.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for icelake */ +/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { -namespace haswell { +namespace icelake { namespace { -namespace stage1 { +/// @private +namespace atomparsing { -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_inline size_t block_index(); - simdjson_inline bool has_full_block() const; - simdjson_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_inline size_t get_remainder(uint8_t *dst) const; - simdjson_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); } -simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] <= ' ') { buf[i] = '_'; } - if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } } -template -simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } -template -simdjson_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } } -template -simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -template -simdjson_inline void buf_block_reader::advance() { - idx += STEP_SIZE; +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } } -} // namespace stage1 +} // namespace atomparsing } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H -/* end file generic/stage1/buf_block_reader.h for haswell */ -/* including generic/stage1/json_escape_scanner.h for haswell: #include */ -/* begin file generic/stage1/json_escape_scanner.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for icelake */ +/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -/** - * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). - */ -struct json_escape_scanner { - /** The actual escape characters (the backslashes themselves). */ - uint64_t next_is_escaped = 0ULL; +namespace icelake { - struct escaped_and_escape { - /** - * Mask of escaped characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 0100100010100101000 - * n \ \ n \ \ - * ``` - */ - uint64_t escaped; - /** - * Mask of escape characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 1001000101001010001 - * \ \ \ \ \ \ \ - * ``` - */ - uint64_t escape; - }; +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container - /** - * Get a mask of both escape and escaped characters (the characters following a backslash). - * - * @param potential_escape A mask of the character that can escape others (but could be - * escaped itself). e.g. block.eq('\\') - */ - simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); -#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT - if (!backslash) { return {next_escaped_without_backslashes(), 0}; } -#endif +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; - // | | Mask (shows characters instead of 1's) | Depth | Instructions | - // |--------------------------------|----------------------------------------|-------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | - // | | ` even odd even odd odd` | | | - // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) - // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) - // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) - // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () - // (*) this is not needed until the next iteration - uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); - uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); - uint64_t escape = escape_and_terminal_code & backslash; - this->next_is_escaped = escape >> 63; - return {escaped, escape}; - } + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { - uint64_t escaped = this->next_is_escaped; - this->next_is_escaped = 0; - return escaped; - } - - /** - * Returns a mask of the next escape characters (masking out escaped backslashes), along with - * any non-backslash escape codes. - * - * \n \\n \\\n \\\\n returns: - * \n \ \ \n \ \ - * 11 100 1011 10100 - * - * You are expected to mask out the first bit yourself if the previous block had a trailing - * escape. - * - * & the result with potential_escape to get just the escape characters. - * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. - */ - static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { - // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: - // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be - // inverted (\\\ would be 010 instead of 101). - // - // ``` - // string: | ____\\\\_\\\\_____ | - // maybe_escaped | ODD | \ \ \ \ | - // even-aligned ^^^ ^^^^ odd-aligned - // ``` - // - // Taking that into account, our basic strategy is: - // - // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for - // odd-aligned runs. - // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the - // odd bits in odd-aligned runs. - // 3. & with backslash to clean up any stray bits. - // runs are set to 0, and then XORing with "odd": - // - // | | Mask (shows characters instead of 1's) | Instructions | - // |--------------------------------|----------------------------------------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | - // | | ` even odd even odd odd` | - // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) - // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) - // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) - // - - // Escaped characters are characters following an escape. - uint64_t maybe_escaped = potential_escape << 1; - - // To distinguish odd from even escape sequences, therefore, we turn on any *starting* - // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) - // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. - // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. - // - All other odd bytes are 1, and even bytes are 0. - uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; - uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; - - // Now we flip all odd bytes back with xor. This: - // - Makes odd runs of backslashes go from 0000 to 1010 - // - Makes even runs of backslashes go from 1111 to 1010 - // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) - // - Resets all other bytes to 0 - return even_series_codes_and_odd_bits ^ ODD_BITS; - } }; -} // namespace stage1 -} // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_escape_scanner.h for haswell */ -/* including generic/stage1/json_string_scanner.h for haswell: #include */ -/* begin file generic/stage1/json_string_scanner.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : - _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-escaped ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Scans for escape characters - json_escape_scanner escape_scanner{}; - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; -}; - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = escape_scanner.next(backslash).escaped; - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; +namespace icelake { - // - // Check if we're still in a string at the end of the box so the next block will know - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - // Use ^ to turn the beginning quote off, and the end quote on. +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block(escaped, quote, in_string); + _capacity = capacity; + return SUCCESS; } -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; return SUCCESS; } -} // namespace stage1 -} // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_string_scanner.h for haswell */ -/* including generic/stage1/utf8_lookup4_algorithm.h for haswell: #include */ -/* begin file generic/stage1/utf8_lookup4_algorithm.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for icelake */ +/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { -namespace { -namespace utf8_validation { +namespace icelake { -using namespace simd; +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { - simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = must_be_2_3_continuation(prev2, prev3); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; +#if SIMDJSON_EXCEPTIONS - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); - simdjson_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); - }; // struct utf8_checker -} // namespace utf8_validation + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); -} // unnamed namespace -} // namespace haswell + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H -/* end file generic/stage1/utf8_lookup4_algorithm.h for haswell */ -/* including generic/stage1/json_scanner.h for haswell: #include */ -/* begin file generic/stage1/json_scanner.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} +#include +#include +#include - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } +namespace simdjson { +namespace icelake { +namespace numberparsing { - // Helpers +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } +namespace { - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() = default; - simdjson_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; + // The fast path has now failed, so we are failing back on the slower path. + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} -simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} - -simdjson_inline error_code json_scanner::finish() { - return string_scanner.finish(); -} + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H -/* end file generic/stage1/json_scanner.h for haswell */ + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; -// All other declarations -/* including generic/stage1/find_next_document_index.h for haswell: #include */ -/* begin file generic/stage1/find_next_document_index.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; } - return 0; + d = to_double(mantissa, real_exponent, negative); + return true; } -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -/* end file generic/stage1/find_next_document_index.h for haswell */ -/* including generic/stage1/json_minifier.h for haswell: #include */ -/* begin file generic/stage1/json_minifier.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} -private: - simdjson_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. -simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); } -template<> -simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; } -template<> -simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; } -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); } - return minifier.finish(dst, dst_len); + return SUCCESS; } -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H -/* end file generic/stage1/json_minifier.h for haswell */ -/* including generic/stage1/json_structural_indexer.h for haswell: #include */ -/* begin file generic/stage1/json_structural_indexer.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well - simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. -#if SIMDJSON_PREFER_REVERSE_BITS - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } -#else - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. - simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); } -#endif // SIMDJSON_PREFER_REVERSE_BITS + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. - template - simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { - write_index(idx, bits, START); - SIMDJSON_IF_CONSTEXPR (N > 1) { - write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); - } - return START+N; + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} - template - simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { - write_indexes(idx, bits); - SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { - if (simdjson_unlikely((START+STEP) < cnt)) { - write_indexes_stepped<(START+STEP(idx, bits, cnt); - } - } - return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it - // will provide its own version of the code. -#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - simdjson_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; - - int cnt = static_cast(count_ones(bits)); - -#if SIMDJSON_PREFER_REVERSE_BITS - bits = reverse_bits(bits); -#endif -#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP - static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; -#else - static constexpr const int STEP = 4; -#endif - static constexpr const int STEP_UNTIL = 24; +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} - write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); - SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { - if (simdjson_unlikely(STEP_UNTIL < cnt)) { - for (int i=STEP_UNTIL; itail += cnt; +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; } -#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - -private: - simdjson_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + return INVALID_NUMBER(src); +} -// Skip the last character if it is partial -simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); } } - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left - return len; + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; } +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing // -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. // -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING -template<> -simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds } -template<> -simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else -simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); -#if SIMDJSON_UTF8VALIDATION - checker.check_next_input(in); -#endif - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - if (unescaped_chars_error) { - return UNESCAPED_CHARS; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } - checker.check_eof(); - return checker.errors(); + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; } -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson +// Inlineable functions +namespace { -// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. -#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H -/* end file generic/stage1/json_structural_indexer.h for haswell */ -/* including generic/stage1/utf8_validator.h for haswell: #include */ -/* begin file generic/stage1/utf8_validator.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); + return i; } -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H -/* end file generic/stage1/utf8_validator.h for haswell */ -/* end file generic/stage1/amalgamated.h for haswell */ -/* including generic/stage2/amalgamated.h for haswell: #include */ -/* begin file generic/stage2/amalgamated.h for haswell */ -// Stuff other things depend on -/* including generic/stage2/base.h for haswell: #include */ -/* begin file generic/stage2/base.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -namespace simdjson { -namespace haswell { -namespace { -namespace stage2 { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -class json_iterator; -class structural_iterator; -struct tape_builder; -struct tape_writer; + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -} // namespace stage2 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson + return i; +} -#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H -/* end file generic/stage2/base.h for haswell */ -/* including generic/stage2/tape_writer.h for haswell: #include */ -/* begin file generic/stage2/tape_writer.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } -#include + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -namespace simdjson { -namespace haswell { -namespace { -namespace stage2 { + return i; +} -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - /** Write a signed 64-bit value to tape. */ - simdjson_inline void append_s64(int64_t value) noexcept; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - /** Write an unsigned 64-bit value to tape. */ - simdjson_inline void append_u64(uint64_t value) noexcept; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} - /** Write a double value to tape. */ - simdjson_inline void append_double(double value) noexcept; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_inline void skip() noexcept; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_inline void skip_large_integer() noexcept; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_inline void skip_double() noexcept; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct tape_writer +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -/** Write a double value to tape. */ -simdjson_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -simdjson_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -simdjson_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -simdjson_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; } -simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); } -template -simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; } -simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; } -} // namespace stage2 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H -/* end file generic/stage2/tape_writer.h for haswell */ -/* including generic/stage2/logger.h for haswell: #include */ -/* begin file generic/stage2/logger.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -#include + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace haswell { -namespace { -namespace logger { + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + exponent += exp_neg ? 0-exp : exp; + } -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - static int log_depth; // Not threadsafe. Log only. + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - // Helper to turn unprintable or newline characters into spaces - static simdjson_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; } + return d; +} - // Print the header and set up log_start - static simdjson_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; } + } else { + overflow = p-src > 19; } - simdjson_unused static simdjson_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; } - // Logs a single line from the stage 2 DOM parser - template - static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} -} // namespace logger } // unnamed namespace -} // namespace haswell +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H -/* end file generic/stage2/logger.h for haswell */ +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for icelake */ -// All other declarations -/* including generic/stage2/json_iterator.h for haswell: #include */ -/* begin file generic/stage2/json_iterator.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { -namespace { -namespace stage2 { - -class json_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; - uint32_t depth{0}; - - /** - * Walk the JSON document. - * - * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as - * the first parameter; some callbacks have other parameters as well: - * - * - visit_document_start() - at the beginning. - * - visit_document_end() - at the end (if things were successful). - * - * - visit_array_start() - at the start `[` of a non-empty array. - * - visit_array_end() - at the end `]` of a non-empty array. - * - visit_empty_array() - when an empty array is encountered. - * - * - visit_object_end() - at the start `]` of a non-empty object. - * - visit_object_start() - at the end `]` of a non-empty object. - * - visit_empty_object() - when an empty object is encountered. - * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is - * guaranteed to point at the first quote of the string (`"key"`). - * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. - * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. - * - * - increment_count(iter) - each time a value is found in an array or object. - */ - template - simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_inline bool at_beginning() const noexcept; - simdjson_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; +namespace icelake { // -// Array parser states +// internal::implementation_simdjson_result_base inline implementation // -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; } +} -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} - return SUCCESS; +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} -} // walk_document() +#if SIMDJSON_EXCEPTIONS -simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; } -simdjson_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); } -simdjson_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); } -simdjson_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); } -simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} +#endif // SIMDJSON_EXCEPTIONS -simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; } -simdjson_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; } -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - // Use the fact that most scalars are going to be either strings or numbers. - if(*value == '"') { - return visitor.visit_string(*this, value); - } else if (((*value - '0') < 10) || (*value == '-')) { - return visitor.visit_number(*this, value); - } - // true, false, null are uncommon. - switch (*value) { - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); } -} // namespace stage2 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} -#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H -/* end file generic/stage2/json_iterator.h for haswell */ -/* including generic/stage2/stringparsing.h for haswell: #include */ -/* begin file generic/stage2/stringparsing.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +} // namespace icelake +} // namespace simdjson +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +/* end file simdjson/generic/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif -namespace simdjson { -namespace haswell { -namespace { -/// @private -namespace stringparsing { +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#endif // SIMDJSON_ICELAKE_H +/* end file simdjson/icelake.h */ +/* including simdjson/icelake/implementation.h: #include */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr, bool allow_replacement) { - // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - constexpr uint32_t substitution_code_point = 0xfffd; - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; +} // namespace icelake +} // namespace simdjson - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ - // We have already checked that the high surrogate is valid and - // (code_point - 0xd800) < 1024. - // - // Check that code_point_2 is in the range 0xdc00..0xdfff - // and that code_point_2 was parsed from valid hex. - uint32_t low_bit = code_point_2 - 0xdc00; - if (low_bit >> 10) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } +// defining SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write +#define SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - } - } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { - // If we encounter a low surrogate (not preceded by a high surrogate) - // then we have an error. - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} +/* including simdjson/icelake/begin.h: #include */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// handle a unicode codepoint using the wobbly convention -// https://simonsapin.github.io/wtf-8/ -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // It is not ideal that this function is nearly identical to handle_unicode_codepoint. - // - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); - uint32_t low_bit = code_point_2 - 0xdc00; - if ((low_bit >> 10) == 0) { - code_point = - (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } - } - } +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO +#if SIMDJSON_CLANG_VISUAL_STUDIO /** - * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There - * must be an unescaped quote terminating the string. It returns the final output - * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large - * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + - * SIMDJSON_PADDING bytes. + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. */ -simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { - // It is not ideal that this function is nearly identical to parse_string. - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint_wobbly(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif } -} // namespace stringparsing } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H -/* end file generic/stage2/stringparsing.h for haswell */ -/* including generic/stage2/structural_iterator.h for haswell: #include */ -/* begin file generic/stage2/structural_iterator.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace icelake { namespace { -namespace stage2 { - -class structural_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; - - // Start a structural - simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { - } - // Get the buffer position of the current structural character - simdjson_inline const uint8_t* current() { - return &buf[*(next_structural-1)]; - } - // Get the current structural character - simdjson_inline char current_char() { - return buf[*(next_structural-1)]; - } - // Get the next structural character without advancing - simdjson_inline char peek_next_char() { - return buf[*next_structural]; - } - simdjson_inline const uint8_t* peek() { - return &buf[*next_structural]; - } - simdjson_inline const uint8_t* advance() { - return &buf[*(next_structural++)]; - } - simdjson_inline char advance_char() { - return buf[*(next_structural++)]; - } - simdjson_inline size_t remaining_len() { - return dom_parser.len - *(next_structural-1); - } - simdjson_inline bool at_end() { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; - } - simdjson_inline bool at_beginning() { - return next_structural == dom_parser.structural_indexes.get(); - } -}; +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} -} // namespace stage2 } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H -/* end file generic/stage2/structural_iterator.h for haswell */ -/* including generic/stage2/tape_builder.h for haswell: #include */ -/* begin file generic/stage2/tape_builder.h for haswell */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + namespace simdjson { -namespace haswell { +namespace icelake { namespace { -namespace stage2 { +namespace simd { -struct tape_builder { - template - simdjson_warn_unused static simdjson_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + // Zero constructor + simdjson_inline base() : value{__m512i()} {} - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; - simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + // Forward-declared so they can be used by splat and friends. + template + struct simd8; - simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } - simdjson_inline tape_builder(dom::document &doc) noexcept; + static const int SIZE = sizeof(base::value); - simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_inline void on_end_string(uint8_t *dst) noexcept; -}; // struct tape_builder + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; -template -simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} -simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } -simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -// private: + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } -simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } -simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } -simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; -simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd -} // namespace stage2 } // unnamed namespace -} // namespace haswell +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -/* end file generic/stage2/tape_builder.h for haswell */ -/* end file generic/stage2/amalgamated.h for haswell */ +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -// -// Stage 1 -// +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - +namespace icelake { namespace { using namespace simd; -// This identifies structural characters (comma, colon, braces, brackets), -// and ASCII white-space ('\r','\n','\t',' '). -simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - // The 6 operators (:,[]{}) have these values: - // - // , 2C - // : 3A - // [ 5B - // { 7B - // ] 5D - // } 7D - // - // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. - // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then - // match it (against | 0x20). - // - // To prevent recognizing other characters, everything else gets compared with 0, which cannot - // match due to the | 0x20. - // - // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , - // and :. This gets caught in stage 2, which checks the actual character to ensure the right - // operators are in the right places. - const auto op_table = simd8::repeat_16( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B - ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D - ); + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - // We compute whitespace and op separately. If later code only uses one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote - const uint64_t whitespace = in.eq({ - _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), - _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) - }); - // Turn [ and ] into { and } - const simd8x64 curlified{ - in.chunks[0] | 0x20, - in.chunks[1] | 0x20 +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits }; - const uint64_t op = curlified.eq({ - _mm256_shuffle_epi8(op_table, in.chunks[0]), - _mm256_shuffle_epi8(op_table, in.chunks[1]) - }); - - return { whitespace, op }; } -simdjson_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().is_ascii(); -} - -simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} +} // unnamed namespace +} // namespace icelake +} // namespace simdjson -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 - return is_third_byte | is_fourth_byte; -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -// -// Stage 2 -// +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// -// Implementation-specific overrides -// namespace simdjson { -namespace haswell { - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return haswell::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { - return haswell::stringparsing::parse_string(src, dst, replacement_char); -} +namespace icelake { +namespace numberparsing { -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { - return haswell::stringparsing::parse_wobbly_string(src, dst); +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest } -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; } -} // namespace haswell +} // namespace numberparsing +} // namespace icelake } // namespace simdjson -/* including simdjson/haswell/end.h: #include */ -/* begin file simdjson/haswell/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_SWAR_NUMBER_PARSING 1 -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_UNTARGET_REGION +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including generic/amalgamated.h for icelake: #include */ +/* begin file generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! #endif -/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/haswell/end.h */ - -#endif // SIMDJSON_SRC_HASWELL_CPP -/* end file haswell.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_ICELAKE -/* including icelake.cpp: #include */ -/* begin file icelake.cpp */ -#ifndef SIMDJSON_SRC_ICELAKE_CPP -#define SIMDJSON_SRC_ICELAKE_CPP +/* including generic/base.h for icelake: #include */ +/* begin file generic/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* including simdjson/icelake.h: #include */ -/* begin file simdjson/icelake.h */ -#ifndef SIMDJSON_ICELAKE_H -#define SIMDJSON_ICELAKE_H - -/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ -/* begin file simdjson/icelake/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ -#define SIMDJSON_IMPLEMENTATION icelake -/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ -/* begin file simdjson/icelake/base.h */ -#ifndef SIMDJSON_ICELAKE_BASE_H -#define SIMDJSON_ICELAKE_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE namespace simdjson { -/** - * Implementation for Icelake (Intel AVX512). - */ namespace icelake { +namespace { -class implementation; +struct json_character_block; +} // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BASE_H -/* end file simdjson/icelake/base.h */ -/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ -/* begin file simdjson/icelake/intrinsics.h */ -#ifndef SIMDJSON_ICELAKE_INTRINSICS_H -#define SIMDJSON_ICELAKE_INTRINSICS_H +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for icelake */ +/* including generic/dom_parser_implementation.h for icelake: #include */ +/* begin file generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// Important: we need the AVX-512 headers: -#include -#include -#include -#include -#include -#include -#include -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO - -static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace icelake { +namespace { -#endif // SIMDJSON_ICELAKE_INTRINSICS_H -/* end file simdjson/icelake/intrinsics.h */ +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") -#endif +} // unnamed namespace +} // namespace icelake +} // namespace simdjson -/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ -/* begin file simdjson/icelake/bitmanipulation.h */ -#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H -#define SIMDJSON_ICELAKE_BITMANIPULATION_H +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for icelake */ +/* including generic/json_character_block.h for icelake: #include */ +/* begin file generic/json_character_block.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} - -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} + uint64_t _whitespace; + uint64_t _op; +}; } // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H -/* end file simdjson/icelake/bitmanipulation.h */ -/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ -/* begin file simdjson/icelake/bitmask.h */ -#ifndef SIMDJSON_ICELAKE_BITMASK_H -#define SIMDJSON_ICELAKE_BITMASK_H +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for icelake */ +/* end file generic/amalgamated.h for icelake */ +/* including generic/stage1/amalgamated.h for icelake: #include */ +/* begin file generic/stage1/amalgamated.h for icelake */ +// Stuff other things depend on +/* including generic/stage1/base.h for icelake: #include */ +/* begin file generic/stage1/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { +namespace stage1 { -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; } // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BITMASK_H -/* end file simdjson/icelake/bitmask.h */ -/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ -/* begin file simdjson/icelake/simd.h */ -#ifndef SIMDJSON_ICELAKE_SIMD_H -#define SIMDJSON_ICELAKE_SIMD_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for icelake */ +/* including generic/stage1/buf_block_reader.h for icelake: #include */ +/* begin file generic/stage1/buf_block_reader.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ == 8 -#define SIMDJSON_GCC8 1 -#endif // __GNUC__ == 8 -#endif // defined(__GNUC__) && !defined(__clang__) - -#if SIMDJSON_GCC8 -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); -} -#endif // SIMDJSON_GCC8 - - +#include namespace simdjson { namespace icelake { namespace { -namespace simd { +namespace stage1 { - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m512i value; +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; - // Zero constructor - simdjson_inline base() : value{__m512i()} {} +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} - // Conversion from SIMD register - simdjson_inline base(const __m512i _value) : value(_value) {} +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} - // Conversion to SIMD register - simdjson_inline operator const __m512i&() const { return this->value; } - simdjson_inline operator __m512i&() { return this->value; } +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} - // Forward-declared so they can be used by splat and friends. - template - struct simd8; +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m512i _value) : base>(_value) {} +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} - friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { - return _mm512_cmpeq_epi8_mask(lhs, rhs); - } +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} - static const int SIZE = sizeof(base::value); +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) - constexpr int shift = 16 - N; - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); - } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m512i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_inline simd8 load(const T values[64]) { - return _mm512_loadu_si512(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for icelake */ +/* including generic/stage1/json_escape_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_escape_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Store to array - simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm512_shuffle_epi8(lookup_table, *this); - } + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); - } +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, - int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, - int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, - int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, - int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // - simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } - }; + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, - uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, - uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, - uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, - uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for icelake */ +/* including generic/stage1/json_string_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_string_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { - return !_mm512_test_epi8_mask(*this, *this); - } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } - }; +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(mask, output); - return 64 - count_ones(mask); - } +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - } +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; - simdjson_inline simd8 reduce_or() const { - return this->chunks[0]; - } +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask - ); - } + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] == mask; - } + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return this->chunks[0] == other.chunks[0]; - } + // Use ^ to turn the beginning quote off, and the end quote on. - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] <= mask; - } - }; // struct simd8x64 + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} -} // namespace simd +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} +} // namespace stage1 } // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_SIMD_H -/* end file simdjson/icelake/simd.h */ -/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ -/* begin file simdjson/icelake/stringparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for icelake */ +/* including generic/stage1/utf8_lookup4_algorithm.h for icelake: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { +namespace utf8_validation { using namespace simd; -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint64_t bs_bits; - uint64_t quote_bits; -}; // struct backslash_and_quote - -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast(v == '\\'), // bs_bits - static_cast(v == '"'), // quote_bits - }; -} + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, -#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -/* end file simdjson/icelake/stringparsing_defs.h */ -/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ -/* begin file simdjson/icelake/numberparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, -namespace simdjson { -namespace icelake { -namespace numberparsing { + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; #else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace numberparsing -} // namespace icelake -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -/* end file simdjson/icelake/numberparsing_defs.h */ -/* end file simdjson/icelake/begin.h */ -/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for icelake */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; #endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } -/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for icelake */ -#ifndef SIMDJSON_GENERIC_BASE_H + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } -namespace simdjson { -namespace icelake { + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } -struct open_container; -class dom_parser_implementation; + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; + }; // struct utf8_checker +} // namespace utf8_validation +} // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for icelake */ -/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for icelake */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for icelake */ +/* including generic/stage1/json_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { -namespace jsoncharutils { +namespace stage1 { -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + // Helpers -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif +}; -} // namespace jsoncharutils -} // unnamed namespace -} // namespace icelake -} // namespace simdjson +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for icelake */ -/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for icelake */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} -namespace simdjson { -namespace icelake { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for icelake */ -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} +// All other declarations +/* including generic/stage1/find_next_document_index.h for icelake: #include */ +/* begin file generic/stage1/find_next_document_index.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; } -} // namespace atomparsing +} // namespace stage1 } // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for icelake */ -/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for icelake */ +/* including generic/stage1/json_minifier.h for icelake: #include */ +/* begin file generic/stage1/json_minifier.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + namespace simdjson { namespace icelake { +namespace { +namespace stage1 { -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { +class json_minifier { public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; }; -} // namespace icelake -} // namespace simdjson - -namespace simdjson { -namespace icelake { +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} - _capacity = capacity; - return SUCCESS; +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); - _max_depth = max_depth; - return SUCCESS; + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); } +} // namespace stage1 +} // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for icelake */ -/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for icelake */ +/* including generic/stage1/json_structural_indexer.h for icelake: #include */ +/* begin file generic/stage1/json_structural_indexer.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + namespace simdjson { namespace icelake { +namespace { +namespace stage1 { -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; +class bit_indexer { +public: + uint32_t *tail; - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} +#if SIMDJSON_PREFER_REVERSE_BITS /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; - + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } -#if SIMDJSON_EXCEPTIONS + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); + int cnt = static_cast(count_ones(bits)); - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; +class json_structural_indexer { +public: /** - * Cast to the value (will throw on error). + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. * - * @throw simdjson_error if there was an error. + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. */ - simdjson_inline operator T&&() && noexcept(false); + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On-Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} +} // namespace stage1 +} // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ -/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for icelake */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for icelake */ +/* including generic/stage1/utf8_validator.h for icelake: #include */ +/* begin file generic/stage1/utf8_validator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include -#include - namespace simdjson { namespace icelake { -namespace numberparsing { +namespace { +namespace stage1 { -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for icelake */ +/* end file generic/stage1/amalgamated.h for icelake */ +/* including generic/stage2/amalgamated.h for icelake: #include */ +/* begin file generic/stage2/amalgamated.h for icelake */ +// Stuff other things depend on +/* including generic/stage2/base.h for icelake: #include */ +/* begin file generic/stage2/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +namespace simdjson { +namespace icelake { namespace { +namespace stage2 { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif - { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson - // The fast path has now failed, so we are failing back on the slower path. +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for icelake */ +/* including generic/stage2/tape_writer.h for icelake: #include */ +/* begin file generic/stage2/tape_writer.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; - mantissa += mantissa & 1; - mantissa >>= 1; + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); } -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; } -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; } -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); } +} // namespace stage2 } // unnamed namespace +} // namespace icelake +} // namespace simdjson -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for icelake */ +/* including generic/stage2/logger.h for icelake: #include */ +/* begin file generic/stage2/logger.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} +#include -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace icelake { +namespace { +namespace logger { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + static int log_depth; // Not threadsafe. Log only. - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } } - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} -// Inlineable functions -namespace { + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; // - // Parse the integer part. + // Start the document // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // + // Read first value + // + { + auto value = advance(); - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } } + goto document_end; - return i; -} +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } } - return i; -} +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; } - return i; + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for icelake */ +/* including generic/stage2/stringparsing.h for icelake: #include */ +/* begin file generic/stage2/stringparsing.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for icelake */ +/* including generic/stage2/structural_iterator.h for icelake: #include */ +/* begin file generic/stage2/structural_iterator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for icelake */ +/* including generic/stage2/tape_builder.h for icelake: #include */ +/* begin file generic/stage2/tape_builder.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for icelake */ +/* end file generic/stage2/amalgamated.h for icelake */ + +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +// +// Stage 1 +// + +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm512_shuffle_epi8(op_table, in.chunks[0]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +/** + * We provide a custom version of bit_indexer::write using + * naked intrinsics. + * TODO: make this code more elegant. + */ +// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. +// as a workaround, we disable warnings within the following function. +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +namespace simdjson { namespace icelake { namespace { namespace stage1 { +simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) { return; } + + const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( + 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, + 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, + 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, + 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 + )); + const __m512i start_index = _mm512_set1_epi32(idx); + + const auto count = count_ones(bits); + __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); + _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); + + if(count > 16) { + const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); + _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); + if(count > 32) { + const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); + _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); + if(count > 48) { + const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); + _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); + } + } + } + this->tail += count; +} +}}}} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return icelake::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return icelake::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return icelake::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace icelake +} // namespace simdjson + +/* including simdjson/icelake/end.h: #include */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_SRC_ICELAKE_CPP +/* end file icelake.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* including ppc64.cpp: #include */ +/* begin file ppc64.cpp */ +#ifndef SIMDJSON_SRC_PPC64_CPP +#define SIMDJSON_SRC_PPC64_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/ppc64.h: #include */ +/* begin file simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for ppc64 */ +/* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for ppc64 */ +/* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for ppc64 */ +/* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +/* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for ppc64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +/* end file simdjson/generic/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_H +/* end file simdjson/ppc64.h */ +/* including simdjson/ppc64/implementation.h: #include */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ + +/* including simdjson/ppc64/begin.h: #include */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including generic/amalgamated.h for ppc64: #include */ +/* begin file generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for ppc64: #include */ +/* begin file generic/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for ppc64 */ +/* including generic/dom_parser_implementation.h for ppc64: #include */ +/* begin file generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace ppc64 { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for ppc64 */ +/* including generic/json_character_block.h for ppc64: #include */ +/* begin file generic/json_character_block.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for ppc64 */ +/* end file generic/amalgamated.h for ppc64 */ +/* including generic/stage1/amalgamated.h for ppc64: #include */ +/* begin file generic/stage1/amalgamated.h for ppc64 */ +// Stuff other things depend on +/* including generic/stage1/base.h for ppc64: #include */ +/* begin file generic/stage1/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for ppc64 */ +/* including generic/stage1/buf_block_reader.h for ppc64: #include */ +/* begin file generic/stage1/buf_block_reader.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for ppc64 */ +/* including generic/stage1/json_escape_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_escape_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for ppc64 */ +/* including generic/stage1/json_string_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_string_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for ppc64 */ +/* including generic/stage1/utf8_lookup4_algorithm.h for ppc64: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ +/* including generic/stage1/json_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for ppc64 */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for ppc64: #include */ +/* begin file generic/stage1/find_next_document_index.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for ppc64 */ +/* including generic/stage1/json_minifier.h for ppc64: #include */ +/* begin file generic/stage1/json_minifier.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for ppc64 */ +/* including generic/stage1/json_structural_indexer.h for ppc64: #include */ +/* begin file generic/stage1/json_structural_indexer.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On-Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for ppc64 */ +/* including generic/stage1/utf8_validator.h for ppc64: #include */ +/* begin file generic/stage1/utf8_validator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for ppc64 */ +/* end file generic/stage1/amalgamated.h for ppc64 */ +/* including generic/stage2/amalgamated.h for ppc64: #include */ +/* begin file generic/stage2/amalgamated.h for ppc64 */ +// Stuff other things depend on +/* including generic/stage2/base.h for ppc64: #include */ +/* begin file generic/stage2/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for ppc64 */ +/* including generic/stage2/tape_writer.h for ppc64: #include */ +/* begin file generic/stage2/tape_writer.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for ppc64 */ +/* including generic/stage2/logger.h for ppc64: #include */ +/* begin file generic/stage2/logger.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace ppc64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for ppc64 */ +/* including generic/stage2/stringparsing.h for ppc64: #include */ +/* begin file generic/stage2/stringparsing.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for ppc64 */ +/* including generic/stage2/structural_iterator.h for ppc64: #include */ +/* begin file generic/stage2/structural_iterator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for ppc64 */ +/* including generic/stage2/tape_builder.h for ppc64: #include */ +/* begin file generic/stage2/tape_builder.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for ppc64 */ +/* end file generic/stage2/amalgamated.h for ppc64 */ + +// +// Stage 1 +// +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return ppc64::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return ppc64::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace ppc64 +} // namespace simdjson + +/* including simdjson/ppc64/end.h: #include */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_SRC_PPC64_CPP +/* end file ppc64.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* including westmere.cpp: #include */ +/* begin file westmere.cpp */ +#ifndef SIMDJSON_SRC_WESTMERE_CPP +#define SIMDJSON_SRC_WESTMERE_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/westmere.h: #include */ +/* begin file simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for westmere */ +/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for westmere */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for westmere */ +/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for westmere */ +/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for westmere */ +/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for westmere */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +/* end file simdjson/generic/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_H +/* end file simdjson/westmere.h */ +/* including simdjson/westmere/implementation.h: #include */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ + +/* including simdjson/westmere/begin.h: #include */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +} // unnamed namespace +} // namespace westmere +} // namespace simdjson - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +namespace simdjson { +namespace westmere { +namespace { +namespace simd { - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } + template + struct base { + __m128i value; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} + // Zero constructor + simdjson_inline base() : value{__m128i()} {} -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } - } else { - overflow = p-src > 19; - } + }; - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - exponent += exp_neg ? 0-exp : exp; - } + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); } - return number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; -} -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); } - } else { - overflow = p-src > 19; - } - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; - exponent += exp_neg ? 0-exp : exp; - } + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } - } else { - overflow = p-src > 19; - } - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 - exponent += exp_neg ? 0-exp : exp; - } +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson - if (*p != '"') { return NUMBER_ERROR; } +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +namespace simdjson { +namespace westmere { +namespace { - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} +using namespace simd; -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); -} // namespace numberparsing + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; } -} // namespace icelake +} // unnamed namespace +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for icelake */ +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including generic/amalgamated.h for westmere: #include */ +/* begin file generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* including generic/base.h for westmere: #include */ +/* begin file generic/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { - -// -// internal::implementation_simdjson_result_base inline implementation -// +namespace westmere { +namespace { -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} +struct json_character_block; -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} +} // unnamed namespace +} // namespace westmere +} // namespace simdjson -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for westmere */ +/* including generic/dom_parser_implementation.h for westmere: #include */ +/* begin file generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H -#if SIMDJSON_EXCEPTIONS +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace westmere { +namespace { -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} +} // unnamed namespace +} // namespace westmere +} // namespace simdjson -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for westmere */ +/* including generic/json_character_block.h for westmere: #include */ +/* begin file generic/json_character_block.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H -#endif // SIMDJSON_EXCEPTIONS +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; -} +namespace simdjson { +namespace westmere { +namespace { -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; -} +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); -} + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + uint64_t _whitespace; + uint64_t _op; +}; -} // namespace icelake +} // unnamed namespace +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ -/* end file simdjson/generic/amalgamated.h for icelake */ -/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ -/* begin file simdjson/icelake/end.h */ +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for westmere */ +/* end file generic/amalgamated.h for westmere */ +/* including generic/stage1/amalgamated.h for westmere: #include */ +/* begin file generic/stage1/amalgamated.h for westmere */ +// Stuff other things depend on +/* including generic/stage1/base.h for westmere: #include */ +/* begin file generic/stage1/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -SIMDJSON_UNTARGET_REGION -#endif +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { -/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/icelake/end.h */ +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; -#endif // SIMDJSON_ICELAKE_H -/* end file simdjson/icelake.h */ -/* including simdjson/icelake/implementation.h: #include */ -/* begin file simdjson/icelake/implementation.h */ -#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H -#define SIMDJSON_ICELAKE_IMPLEMENTATION_H +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for westmere */ +/* including generic/stage1/buf_block_reader.h for westmere: #include */ +/* begin file generic/stage1/buf_block_reader.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +#include + namespace simdjson { -namespace icelake { +namespace westmere { +namespace { +namespace stage1 { -/** - * @private - */ -class implementation final : public simdjson::implementation { +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { public: - simdjson_inline implementation() : simdjson::implementation( - "icelake", - "Intel/AMD AVX512", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; }; -} // namespace icelake -} // namespace simdjson +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} -#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H -/* end file simdjson/icelake/implementation.h */ +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} -// defining SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write -#define SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } -/* including simdjson/icelake/begin.h: #include */ -/* begin file simdjson/icelake/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ -#define SIMDJSON_IMPLEMENTATION icelake -/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ -/* begin file simdjson/icelake/base.h */ -#ifndef SIMDJSON_ICELAKE_BASE_H -#define SIMDJSON_ICELAKE_BASE_H +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE -namespace simdjson { -/** - * Implementation for Icelake (Intel AVX512). - */ -namespace icelake { +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} -class implementation; +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} -} // namespace icelake +} // namespace stage1 +} // unnamed namespace +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BASE_H -/* end file simdjson/icelake/base.h */ -/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ -/* begin file simdjson/icelake/intrinsics.h */ -#ifndef SIMDJSON_ICELAKE_INTRINSICS_H -#define SIMDJSON_ICELAKE_INTRINSICS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for westmere */ +/* including generic/stage1/json_escape_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_escape_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { -#if SIMDJSON_CLANG_VISUAL_STUDIO /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// Important: we need the AVX-512 headers: -#include -#include -#include -#include -#include -#include -#include -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; -static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; -#endif // SIMDJSON_ICELAKE_INTRINSICS_H -/* end file simdjson/icelake/intrinsics.h */ + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } #endif -/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ -/* begin file simdjson/icelake/bitmanipulation.h */ -#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H -#define SIMDJSON_ICELAKE_BITMANIPULATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } -namespace simdjson { -namespace icelake { -namespace { +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; +} // namespace stage1 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H -/* end file simdjson/icelake/bitmanipulation.h */ -/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ -/* begin file simdjson/icelake/bitmask.h */ -#ifndef SIMDJSON_ICELAKE_BITMASK_H -#define SIMDJSON_ICELAKE_BITMASK_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for westmere */ +/* including generic/stage1/json_string_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_string_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; // -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// Return a mask of all string characters plus end quotes. // -// For example, prefix_xor(00100100) == 00011100 +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. // -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; -#endif // SIMDJSON_ICELAKE_BITMASK_H -/* end file simdjson/icelake/bitmask.h */ -/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ -/* begin file simdjson/icelake/simd.h */ -#ifndef SIMDJSON_ICELAKE_SIMD_H -#define SIMDJSON_ICELAKE_SIMD_H + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Use ^ to turn the beginning quote off, and the end quote on. -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ == 8 -#define SIMDJSON_GCC8 1 -#endif // __GNUC__ == 8 -#endif // defined(__GNUC__) && !defined(__clang__) + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} -#if SIMDJSON_GCC8 -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; } -#endif // SIMDJSON_GCC8 +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for westmere */ +/* including generic/stage1/utf8_lookup4_algorithm.h for westmere: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { -namespace simd { +namespace utf8_validation { - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m512i value; +using namespace simd; - // Zero constructor - simdjson_inline base() : value{__m512i()} {} + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - // Conversion from SIMD register - simdjson_inline base(const __m512i _value) : value(_value) {} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, - // Conversion to SIMD register - simdjson_inline operator const __m512i&() const { return this->value; } - simdjson_inline operator __m512i&() { return this->value; } + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - // Forward-declared so they can be used by splat and friends. - template - struct simd8; + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m512i _value) : base>(_value) {} + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; - friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { - return _mm512_cmpeq_epi8_mask(lhs, rhs); + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - static const int SIZE = sizeof(base::value); - - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) - constexpr int shift = 16 - N; - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } - - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m512i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_inline simd8 load(const T values[64]) { - return _mm512_loadu_si512(reinterpret_cast(values)); + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} - - // Store to array - simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } - - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + }; // struct utf8_checker +} // namespace utf8_validation - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm512_shuffle_epi8(lookup_table, *this); - } +} // unnamed namespace +} // namespace westmere +} // namespace simdjson - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); - } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for westmere */ +/* including generic/stage1/json_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, - int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, - int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, - int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, - int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } - }; + // Helpers - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, - uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, - uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, - uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, - uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; - simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { - return !_mm512_test_epi8_mask(*this, *this); - } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } - }; - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(mask, output); - return 64 - count_ones(mask); - } +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for westmere */ - simdjson_inline simd8 reduce_or() const { - return this->chunks[0]; - } +// All other declarations +/* including generic/stage1/find_next_document_index.h for westmere: #include */ +/* begin file generic/stage1/find_next_document_index.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask - ); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] == mask; - } +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return this->chunks[0] == other.chunks[0]; +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; } - - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] <= mask; + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; } - }; // struct simd8x64 - -} // namespace simd + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} +} // namespace stage1 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_ICELAKE_SIMD_H -/* end file simdjson/icelake/simd.h */ -/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ -/* begin file simdjson/icelake/stringparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for westmere */ +/* including generic/stage1/json_minifier.h for westmere: #include */ +/* begin file generic/stage1/json_minifier.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + namespace simdjson { -namespace icelake { +namespace westmere { namespace { +namespace stage1 { -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { +class json_minifier { public: - static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; - uint64_t bs_bits; - uint64_t quote_bits; -}; // struct backslash_and_quote +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast(v == '\\'), // bs_bits - static_cast(v == '"'), // quote_bits - }; +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); } +} // namespace stage1 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -/* end file simdjson/icelake/stringparsing_defs.h */ -/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ -/* begin file simdjson/icelake/numberparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for westmere */ +/* including generic/stage1/json_structural_indexer.h for westmere: #include */ +/* begin file generic/stage1/json_structural_indexer.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + namespace simdjson { -namespace icelake { -namespace numberparsing { +namespace westmere { +namespace { +namespace stage1 { -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} +class bit_indexer { +public: + uint32_t *tail; -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } #else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS + + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); + +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); #endif - return answer; +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; + + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); } -} // namespace numberparsing -} // namespace icelake -} // namespace simdjson +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} -#define SIMDJSON_SWAR_NUMBER_PARSING 1 +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} -#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -/* end file simdjson/icelake/numberparsing_defs.h */ -/* end file simdjson/icelake/begin.h */ -/* including generic/amalgamated.h for icelake: #include */ -/* begin file generic/amalgamated.h for icelake */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) -#error generic/dependencies.h must be included before generic/amalgamated.h! +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); #endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} -/* including generic/base.h for icelake: #include */ -/* begin file generic/base.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } -namespace simdjson { -namespace icelake { -namespace { + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On-Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } -struct json_character_block; + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} +} // namespace stage1 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_BASE_H -/* end file generic/base.h for icelake */ -/* including generic/dom_parser_implementation.h for icelake: #include */ -/* begin file generic/dom_parser_implementation.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// Interface a dom parser implementation must fulfill -namespace simdjson { -namespace icelake { -namespace { - -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); -simdjson_inline bool is_ascii(const simd8x64& input); - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER -#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file generic/dom_parser_implementation.h for icelake */ -/* including generic/json_character_block.h for icelake: #include */ -/* begin file generic/json_character_block.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for westmere */ +/* including generic/stage1/utf8_validator.h for westmere: #include */ +/* begin file generic/stage1/utf8_validator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { +namespace stage1 { -struct json_character_block { - static simdjson_inline json_character_block classify(const simd::simd8x64& in); - - simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_inline uint64_t op() const noexcept { return _op; } - simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} - uint64_t _whitespace; - uint64_t _op; -}; +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} +} // namespace stage1 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H -/* end file generic/json_character_block.h for icelake */ -/* end file generic/amalgamated.h for icelake */ -/* including generic/stage1/amalgamated.h for icelake: #include */ -/* begin file generic/stage1/amalgamated.h for icelake */ +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for westmere */ +/* end file generic/stage1/amalgamated.h for westmere */ +/* including generic/stage2/amalgamated.h for westmere: #include */ +/* begin file generic/stage2/amalgamated.h for westmere */ // Stuff other things depend on -/* including generic/stage1/base.h for icelake: #include */ -/* begin file generic/stage1/base.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* including generic/stage2/base.h for westmere: #include */ +/* begin file generic/stage2/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ /* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { -namespace stage1 { - -class bit_indexer; -template -struct buf_block_reader; -struct json_block; -class json_minifier; -class json_scanner; -struct json_string_block; -class json_string_scanner; -class json_structural_indexer; - -} // namespace stage1 - -namespace utf8_validation { -struct utf8_checker; -} // namespace utf8_validation +namespace stage2 { -using utf8_validation::utf8_checker; +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; +} // namespace stage2 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H -/* end file generic/stage1/base.h for icelake */ -/* including generic/stage1/buf_block_reader.h for icelake: #include */ -/* begin file generic/stage1/buf_block_reader.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for westmere */ +/* including generic/stage2/tape_writer.h for westmere: #include */ +/* begin file generic/stage2/tape_writer.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include namespace simdjson { -namespace icelake { +namespace westmere { namespace { -namespace stage1 { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_inline size_t block_index(); - simdjson_inline bool has_full_block() const; - simdjson_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_inline size_t get_remainder(uint8_t *dst) const; - simdjson_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] <= ' ') { buf[i] = '_'; } - if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} +namespace stage2 { -template -simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; -template -simdjson_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H -/* end file generic/stage1/buf_block_reader.h for icelake */ -/* including generic/stage1/json_escape_scanner.h for icelake: #include */ -/* begin file generic/stage1/json_escape_scanner.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; -/** - * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). - */ -struct json_escape_scanner { - /** The actual escape characters (the backslashes themselves). */ - uint64_t next_is_escaped = 0ULL; + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; - struct escaped_and_escape { - /** - * Mask of escaped characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 0100100010100101000 - * n \ \ n \ \ - * ``` - */ - uint64_t escaped; - /** - * Mask of escape characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 1001000101001010001 - * \ \ \ \ \ \ \ - * ``` - */ - uint64_t escape; - }; + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; /** - * Get a mask of both escape and escaped characters (the characters following a backslash). + * Write a value to a known location on tape. * - * @param potential_escape A mask of the character that can escape others (but could be - * escaped itself). e.g. block.eq('\\') + * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; -#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT - if (!backslash) { return {next_escaped_without_backslashes(), 0}; } -#endif +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer - // | | Mask (shows characters instead of 1's) | Depth | Instructions | - // |--------------------------------|----------------------------------------|-------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | - // | | ` even odd even odd odd` | | | - // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) - // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) - // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) - // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () - // (*) this is not needed until the next iteration - uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); - uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); - uint64_t escape = escape_and_terminal_code & backslash; - this->next_is_escaped = escape >> 63; - return {escaped, escape}; - } +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} -private: - static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} - simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { - uint64_t escaped = this->next_is_escaped; - this->next_is_escaped = 0; - return escaped; - } +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} - /** - * Returns a mask of the next escape characters (masking out escaped backslashes), along with - * any non-backslash escape codes. - * - * \n \\n \\\n \\\\n returns: - * \n \ \ \n \ \ - * 11 100 1011 10100 - * - * You are expected to mask out the first bit yourself if the previous block had a trailing - * escape. - * - * & the result with potential_escape to get just the escape characters. - * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. - */ - static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { - // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: - // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be - // inverted (\\\ would be 010 instead of 101). - // - // ``` - // string: | ____\\\\_\\\\_____ | - // maybe_escaped | ODD | \ \ \ \ | - // even-aligned ^^^ ^^^^ odd-aligned - // ``` - // - // Taking that into account, our basic strategy is: - // - // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for - // odd-aligned runs. - // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the - // odd bits in odd-aligned runs. - // 3. & with backslash to clean up any stray bits. - // runs are set to 0, and then XORing with "odd": - // - // | | Mask (shows characters instead of 1's) | Instructions | - // |--------------------------------|----------------------------------------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | - // | | ` even odd even odd odd` | - // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) - // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) - // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) - // +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} - // Escaped characters are characters following an escape. - uint64_t maybe_escaped = potential_escape << 1; +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} - // To distinguish odd from even escape sequences, therefore, we turn on any *starting* - // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) - // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. - // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. - // - All other odd bytes are 1, and even bytes are 0. - uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; - uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} - // Now we flip all odd bytes back with xor. This: - // - Makes odd runs of backslashes go from 0000 to 1010 - // - Makes even runs of backslashes go from 1111 to 1010 - // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) - // - Resets all other bytes to 0 - return even_series_codes_and_odd_bits ^ ODD_BITS; - } -}; +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} -} // namespace stage1 +} // namespace stage2 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_escape_scanner.h for icelake */ -/* including generic/stage1/json_string_scanner.h for icelake: #include */ -/* begin file generic/stage1/json_string_scanner.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for westmere */ +/* including generic/stage2/logger.h for westmere: #include */ +/* begin file generic/stage2/logger.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : - _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } +#include - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-escaped ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace westmere { +namespace { +namespace logger { -private: - // Scans for escape characters - json_escape_scanner escape_scanner{}; - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; -}; + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = escape_scanner.next(backslash).escaped; - const uint64_t quote = in.eq('"') & ~escaped; +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + static int log_depth; // Not threadsafe. Log only. - // - // Check if we're still in a string at the end of the box so the next block will know - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } - // Use ^ to turn the beginning quote off, and the end quote on. + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block(escaped, quote, in_string); -} + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ -/* begin file generic/stage1/utf8_lookup4_algorithm.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H +/* end file generic/stage2/logger.h for westmere */ + +// All other declarations +/* including generic/stage2/json_iterator.h for westmere: #include */ +/* begin file generic/stage2/json_iterator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ +namespace stage2 { - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = must_be_2_3_continuation(prev2, prev3); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } + // + // Read first value + // + { + auto value = advance(); - simdjson_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; } } - // do not forget to call check_eof! - simdjson_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; } + } + goto document_end; - }; // struct utf8_checker -} // namespace utf8_validation +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H -/* end file generic/stage1/utf8_lookup4_algorithm.h for icelake */ -/* including generic/stage1/json_scanner.h for icelake: #include */ -/* begin file generic/stage1/json_scanner.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } - // Helpers +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() = default; - simdjson_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; + return SUCCESS; +} // walk_document() -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { } -simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; } - -simdjson_inline error_code json_scanner::finish() { - return string_scanner.finish(); +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); } -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H -/* end file generic/stage1/json_scanner.h for icelake */ +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} -// All other declarations -/* including generic/stage1/find_next_document_index.h for icelake: #include */ -/* begin file generic/stage1/find_next_document_index.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; } - return 0; } -} // namespace stage1 +} // namespace stage2 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -/* end file generic/stage1/find_next_document_index.h for icelake */ -/* including generic/stage1/json_minifier.h for icelake: #include */ -/* begin file generic/stage1/json_minifier.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for westmere */ +/* including generic/stage2/stringparsing.h for westmere: #include */ +/* begin file generic/stage2/stringparsing.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; -private: - simdjson_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); -simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; } -template<> -simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} -template<> -simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; } -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } } +} - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } } - return minifier.finish(dst, dst_len); } -} // namespace stage1 +} // namespace stringparsing } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H -/* end file generic/stage1/json_minifier.h for icelake */ -/* including generic/stage1/json_structural_indexer.h for icelake: #include */ -/* begin file generic/stage1/json_structural_indexer.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for westmere */ +/* including generic/stage2/structural_iterator.h for westmere: #include */ +/* begin file generic/stage2/structural_iterator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - namespace simdjson { -namespace icelake { +namespace westmere { namespace { -namespace stage1 { +namespace stage2 { -class bit_indexer { +class structural_iterator { public: - uint32_t *tail; - - simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; -#if SIMDJSON_PREFER_REVERSE_BITS - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { } -#else - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - - simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; } -#endif // SIMDJSON_PREFER_REVERSE_BITS - - template - simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { - write_index(idx, bits, START); - SIMDJSON_IF_CONSTEXPR (N > 1) { - write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); - } - return START+N; + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); } - template - simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { - write_indexes(idx, bits); - SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { - if (simdjson_unlikely((START+STEP) < cnt)) { - write_indexes_stepped<(START+STEP(idx, bits, cnt); - } - } - return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); } +}; - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it - // will provide its own version of the code. -#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - simdjson_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson - int cnt = static_cast(count_ones(bits)); +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for westmere */ +/* including generic/stage2/tape_builder.h for westmere: #include */ +/* begin file generic/stage2/tape_builder.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -#if SIMDJSON_PREFER_REVERSE_BITS - bits = reverse_bits(bits); -#endif -#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP - static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; -#else - static constexpr const int STEP = 4; -#endif - static constexpr const int STEP_UNTIL = 24; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); - SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { - if (simdjson_unlikely(STEP_UNTIL < cnt)) { - for (int i=STEP_UNTIL; itail += cnt; - } -#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { -}; +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; -class json_structural_indexer { -public: + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * Called when a key in a field is encountered. * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + /** Next location to write to tape */ + tape_writer tape; private: - simdjson_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; + simdjson_inline tape_builder(dom::document &doc) noexcept; -simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder -// Skip the last character if it is partial -simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left - return len; +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); } -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -template<> -simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; } -template<> -simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); } -simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); -#if SIMDJSON_UTF8VALIDATION - checker.check_next_input(in); -#endif - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; } -simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } + on_end_string(dst); + return SUCCESS; +} - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); } -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} -// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. -#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H -/* end file generic/stage1/json_structural_indexer.h for icelake */ -/* including generic/stage1/utf8_validator.h for icelake: #include */ -/* begin file generic/stage1/utf8_validator.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; } -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; } -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H -/* end file generic/stage1/utf8_validator.h for icelake */ -/* end file generic/stage1/amalgamated.h for icelake */ -/* including generic/stage2/amalgamated.h for icelake: #include */ -/* begin file generic/stage2/amalgamated.h for icelake */ -// Stuff other things depend on -/* including generic/stage2/base.h for icelake: #include */ -/* begin file generic/stage2/base.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +// private: -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} -namespace simdjson { -namespace icelake { -namespace { -namespace stage2 { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} -class json_iterator; -class structural_iterator; -struct tape_builder; -struct tape_writer; +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} } // namespace stage2 } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H -/* end file generic/stage2/base.h for icelake */ -/* including generic/stage2/tape_writer.h for icelake: #include */ -/* begin file generic/stage2/tape_writer.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for westmere */ +/* end file generic/stage2/amalgamated.h for westmere */ -#include +// +// Stage 1 +// namespace simdjson { -namespace icelake { +namespace westmere { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + namespace { -namespace stage2 { -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; +using namespace simd; - /** Write a signed 64-bit value to tape. */ - simdjson_inline void append_s64(int64_t value) noexcept; +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); - /** Write an unsigned 64-bit value to tape. */ - simdjson_inline void append_u64(uint64_t value) noexcept; + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). - /** Write a double value to tape. */ - simdjson_inline void append_double(double value) noexcept; - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + const uint64_t whitespace = in.eq({ + _mm_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm_shuffle_epi8(whitespace_table, in.chunks[1]), + _mm_shuffle_epi8(whitespace_table, in.chunks[2]), + _mm_shuffle_epi8(whitespace_table, in.chunks[3]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20, + in.chunks[2] | 0x20, + in.chunks[3] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm_shuffle_epi8(op_table, in.chunks[0]), + _mm_shuffle_epi8(op_table, in.chunks[1]), + _mm_shuffle_epi8(op_table, in.chunks[2]), + _mm_shuffle_epi8(op_table, in.chunks[3]) + }); + return { whitespace, op }; +} - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_inline void skip() noexcept; +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_inline void skip_large_integer() noexcept; +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_inline void skip_double() noexcept; +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; +} // unnamed namespace +} // namespace westmere +} // namespace simdjson -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct tape_writer +// +// Stage 2 +// -simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} +// +// Implementation-specific overrides +// -simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); } -/** Write a double value to tape. */ -simdjson_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); } -simdjson_inline void tape_writer::skip() noexcept { - next_tape_loc++; +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return westmere::stage1::generic_validate_utf8(buf,len); } -simdjson_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); } -simdjson_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); } -simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return westmere::stringparsing::parse_string(src, dst, replacement_char); } -template -simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return westmere::stringparsing::parse_wobbly_string(src, dst); } -simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); } -} // namespace stage2 -} // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H -/* end file generic/stage2/tape_writer.h for icelake */ -/* including generic/stage2/logger.h for icelake: #include */ -/* begin file generic/stage2/logger.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H - +/* including simdjson/westmere/end.h: #include */ +/* begin file simdjson/westmere/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - - -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace icelake { -namespace { -namespace logger { +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; +#endif // SIMDJSON_SRC_WESTMERE_CPP +/* end file westmere.cpp */ #endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; +#if SIMDJSON_IMPLEMENTATION_LSX +/* including lsx.cpp: #include */ +/* begin file lsx.cpp */ +#ifndef SIMDJSON_SRC_LSX_CPP +#define SIMDJSON_SRC_LSX_CPP - static int log_depth; // Not threadsafe. Log only. +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Helper to turn unprintable or newline characters into spaces - static simdjson_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } +/* including simdjson/lsx.h: #include */ +/* begin file simdjson/lsx.h */ +#ifndef SIMDJSON_LSX_H +#define SIMDJSON_LSX_H - // Print the header and set up log_start - static simdjson_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } +/* including simdjson/lsx/begin.h: #include "simdjson/lsx/begin.h" */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H - simdjson_unused static simdjson_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Logs a single line from the stage 2 DOM parser - template - static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i struct simd8; +template struct simd8x64; +} // namespace simd } // unnamed namespace -} // namespace icelake + +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H -/* end file generic/stage2/logger.h for icelake */ +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H -// All other declarations -/* including generic/stage2/json_iterator.h for icelake: #include */ -/* begin file generic/stage2/json_iterator.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace lsx { namespace { -namespace stage2 { - -class json_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; - uint32_t depth{0}; - - /** - * Walk the JSON document. - * - * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as - * the first parameter; some callbacks have other parameters as well: - * - * - visit_document_start() - at the beginning. - * - visit_document_end() - at the end (if things were successful). - * - * - visit_array_start() - at the start `[` of a non-empty array. - * - visit_array_end() - at the end `]` of a non-empty array. - * - visit_empty_array() - when an empty array is encountered. - * - * - visit_object_end() - at the start `]` of a non-empty object. - * - visit_object_start() - at the end `]` of a non-empty object. - * - visit_empty_object() - when an empty object is encountered. - * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is - * guaranteed to point at the first quote of the string (`"key"`). - * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. - * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. - * - * - increment_count(iter) - each time a value is found in an array or object. - */ - template - simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_inline bool at_beginning() const noexcept; - simdjson_inline uint8_t last_structural() const noexcept; +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_inline void log_error(const char *error) const noexcept; +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} - template - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - // - // Read first value - // - { - auto value = advance(); +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; +namespace simdjson { +namespace lsx { +namespace { // -// Object parser states +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); +namespace simdjson { +namespace lsx { +namespace { +namespace simd { - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } + // Zero constructor + simdjson_inline base() : value{__m128i()} {} - return SUCCESS; + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} -} // walk_document() + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } -simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; -simdjson_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} + // Forward-declared so they can be used by splat and friends. + template + struct simd8; -simdjson_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} -simdjson_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } -simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} + static const int SIZE = sizeof(base>::value); -simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; -simdjson_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - // Use the fact that most scalars are going to be either strings or numbers. - if(*value == '"') { - return visitor.visit_string(*this, value); - } else if (((*value - '0') < 10) || (*value == '-')) { - return visitor.visit_number(*this, value); - } - // true, false, null are uncommon. - switch (*value) { - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} -} // namespace stage2 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; -#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H -/* end file generic/stage2/json_iterator.h for icelake */ -/* including generic/stage2/stringparsing.h for icelake: #include */ -/* begin file generic/stage2/stringparsing.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } -namespace simdjson { -namespace icelake { -namespace { -/// @private -namespace stringparsing { + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr, bool allow_replacement) { - // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - constexpr uint32_t substitution_code_point = 0xfffd; - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; - // We have already checked that the high surrogate is valid and - // (code_point - 0xd800) < 1024. - // - // Check that code_point_2 is in the range 0xdc00..0xdfff - // and that code_point_2 was parsed from valid hex. - uint32_t low_bit = code_point_2 - 0xdc00; - if (low_bit >> 10) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); } - } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { - // If we encounter a low surrogate (not preceded by a high surrogate) - // then we have an error. - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -// handle a unicode codepoint using the wobbly convention -// https://simonsapin.github.io/wtf-8/ -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // It is not ideal that this function is nearly identical to handle_unicode_codepoint. - // - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); - uint32_t low_bit = code_point_2 - 0xdc00; - if ((low_bit >> 10) == 0) { - code_point = - (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); } - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } -/** - * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There - * must be an unescaped quote terminating the string. It returns the final output - * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large - * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + - * SIMDJSON_PADDING bytes. - */ -simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } - } -} -simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { - // It is not ideal that this function is nearly identical to parse_string. - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint_wobbly(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); } - } -} + }; // struct simd8x64 -} // namespace stringparsing +} // namespace simd } // unnamed namespace -} // namespace icelake +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H -/* end file generic/stage2/stringparsing.h for icelake */ -/* including generic/stage2/structural_iterator.h for icelake: #include */ -/* begin file generic/stage2/structural_iterator.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace lsx { namespace { -namespace stage2 { -class structural_iterator { +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - // Start a structural - simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { - } - // Get the buffer position of the current structural character - simdjson_inline const uint8_t* current() { - return &buf[*(next_structural-1)]; - } - // Get the current structural character - simdjson_inline char current_char() { - return buf[*(next_structural-1)]; - } - // Get the next structural character without advancing - simdjson_inline char peek_next_char() { - return buf[*next_structural]; - } - simdjson_inline const uint8_t* peek() { - return &buf[*next_structural]; - } - simdjson_inline const uint8_t* advance() { - return &buf[*(next_structural++)]; - } - simdjson_inline char advance_char() { - return buf[*(next_structural++)]; - } - simdjson_inline size_t remaining_len() { - return dom_parser.len - *(next_structural-1); - } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - simdjson_inline bool at_end() { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; - } - simdjson_inline bool at_beginning() { - return next_structural == dom_parser.structural_indexes.get(); - } -}; + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} -} // namespace stage2 } // unnamed namespace -} // namespace icelake +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H -/* end file generic/stage2/structural_iterator.h for icelake */ -/* including generic/stage2/tape_builder.h for icelake: #include */ -/* begin file generic/stage2/tape_builder.h for icelake */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including simdjson/generic/amalgamated.h for lsx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lsx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lsx */ +#ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +namespace simdjson { +namespace lsx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lsx */ +/* including simdjson/generic/jsoncharutils.h for lsx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lsx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace lsx { namespace { -namespace stage2 { +namespace jsoncharutils { -struct tape_builder { - template - simdjson_warn_unused static simdjson_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif - simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lsx */ +/* including simdjson/generic/atomparsing.h for lsx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; +#include - simdjson_inline tape_builder(dom::document &doc) noexcept; +namespace simdjson { +namespace lsx { +namespace { +/// @private +namespace atomparsing { - simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_inline void on_end_string(uint8_t *dst) noexcept; -}; // struct tape_builder +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } -template -simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } } -simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 +} // namespace atomparsing +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lsx */ +/* including simdjson/generic/dom_parser_implementation.h for lsx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lsx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lsx +} // namespace simdjson + +namespace simdjson { +namespace lsx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; return SUCCESS; } -simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); + _max_depth = max_depth; return SUCCESS; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} +} // namespace lsx +} // namespace simdjson -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lsx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} +namespace simdjson { +namespace lsx { -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; -// private: + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; -simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; -simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; -simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} +#if SIMDJSON_EXCEPTIONS -simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); -simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); -simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); -} // namespace stage2 -} // unnamed namespace -} // namespace icelake + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -/* end file generic/stage2/tape_builder.h for icelake */ -/* end file generic/stage2/amalgamated.h for icelake */ +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +/* including simdjson/generic/numberparsing.h for lsx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H -#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// -// Stage 1 -// +#include +#include +#include namespace simdjson { -namespace icelake { +namespace lsx { +namespace numberparsing { -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif namespace { -using namespace simd; +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} -// This identifies structural characters (comma, colon, braces, brackets), -// and ASCII white-space ('\r','\n','\t',' '). -simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - // The 6 operators (:,[]{}) have these values: + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. // - // , 2C - // : 3A - // [ 5B - // { 7B - // ] 5D - // } 7D + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. // - // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. - // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then - // match it (against | 0x20). + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 // - // To prevent recognizing other characters, everything else gets compared with 0, which cannot - // match due to the | 0x20. + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power // - // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , - // and :. This gets caught in stage 2, which checks the actual character to ensure the right - // operators are in the right places. - const auto op_table = simd8::repeat_16( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B - ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D - ); - - // We compute whitespace and op separately. If later code only uses one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). - - const uint64_t whitespace = in.eq({ - _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) - }); - // Turn [ and ] into { and } - const simd8x64 curlified{ - in.chunks[0] | 0x20 - }; - const uint64_t op = curlified.eq({ - _mm512_shuffle_epi8(op_table, in.chunks[0]) - }); - - return { whitespace, op }; -} - -simdjson_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().is_ascii(); -} - -simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 - return is_third_byte | is_fourth_byte; -} -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; -/** - * We provide a custom version of bit_indexer::write using - * naked intrinsics. - * TODO: make this code more elegant. - */ -// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. -// as a workaround, we disable warnings within the following function. -SIMDJSON_PUSH_DISABLE_ALL_WARNINGS -namespace simdjson { namespace icelake { namespace { namespace stage1 { -simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) { return; } - const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( - 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, - 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, - 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, - 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 - )); - const __m512i start_index = _mm512_set1_epi32(idx); + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. - const auto count = count_ones(bits); - __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); - _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); - if(count > 16) { - const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); - _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); - if(count > 32) { - const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); - _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); - if(count > 48) { - const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); - _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); - } - } + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; } - this->tail += count; -} -}}}} -SIMDJSON_POP_DISABLE_WARNINGS - -// -// Stage 2 -// + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } -// -// Implementation-specific overrides -// -namespace simdjson { -namespace icelake { + mantissa += mantissa & 1; + mantissa >>= 1; -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; } -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); -} +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return icelake::stage1::generic_validate_utf8(buf,len); + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { - return icelake::stringparsing::parse_string(src, dst, replacement_char); +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); } -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { - return icelake::stringparsing::parse_wobbly_string(src, dst); +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; } -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; } -} // namespace icelake -} // namespace simdjson - -/* including simdjson/icelake/end.h: #include */ -/* begin file simdjson/icelake/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -SIMDJSON_UNTARGET_REGION -#endif +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; -/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/icelake/end.h */ +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} -#endif // SIMDJSON_SRC_ICELAKE_CPP -/* end file icelake.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_PPC64 -/* including ppc64.cpp: #include */ -/* begin file ppc64.cpp */ -#ifndef SIMDJSON_SRC_PPC64_CPP -#define SIMDJSON_SRC_PPC64_CPP +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. -/* including simdjson/ppc64.h: #include */ -/* begin file simdjson/ppc64.h */ -#ifndef SIMDJSON_PPC64_H -#define SIMDJSON_PPC64_H + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. -/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ -/* begin file simdjson/ppc64/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ -#define SIMDJSON_IMPLEMENTATION ppc64 -/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ -/* begin file simdjson/ppc64/base.h */ -#ifndef SIMDJSON_PPC64_BASE_H -#define SIMDJSON_PPC64_BASE_H + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} -namespace simdjson { -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 { +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} -class implementation; +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} -namespace { -namespace simd { -template struct simd8; -template struct simd8x64; -} // namespace simd } // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -#endif // SIMDJSON_PPC64_BASE_H -/* end file simdjson/ppc64/base.h */ -/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ -/* begin file simdjson/ppc64/intrinsics.h */ -#ifndef SIMDJSON_PPC64_INTRINSICS_H -#define SIMDJSON_PPC64_INTRINSICS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool -#undef bool -#endif - -#ifdef vector -#undef vector -#endif - -static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); - -#endif // SIMDJSON_PPC64_INTRINSICS_H -/* end file simdjson/ppc64/intrinsics.h */ -/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ -/* begin file simdjson/ppc64/bitmanipulation.h */ -#ifndef SIMDJSON_PPC64_BITMANIPULATION_H -#define SIMDJSON_PPC64_BITMANIPULATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace ppc64 { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num - 1); +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdjson_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } #else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -#endif // SIMDJSON_PPC64_BITMANIPULATION_H -/* end file simdjson/ppc64/bitmanipulation.h */ -/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ -/* begin file simdjson/ppc64/bitmask.h */ -#ifndef SIMDJSON_PPC64_BITMASK_H -#define SIMDJSON_PPC64_BITMASK_H + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } -namespace simdjson { -namespace ppc64 { -namespace { + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is -// encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { - // You can use the version below, however gcc sometimes miscompiles - // vec_pmsum_be, it happens somewhere around between 8 and 9th version. - // The performance boost was not noticeable, falling back to a usual - // implementation. - // __vector unsigned long long all_ones = {~0ull, ~0ull}; - // __vector unsigned long long mask = {bitmask, 0}; - // // Clang and GCC return different values for pmsum for ull so cast it to one. - // // Generally it is not specified by ALTIVEC ISA what is returned by - // // vec_pmsum_be. - // #if defined(__LITTLE_ENDIAN__) - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); - // #else - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); - // #endif - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; } -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +// Inlineable functions +namespace { -#endif -/* end file simdjson/ppc64/bitmask.h */ -/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ -/* begin file simdjson/ppc64/numberparsing_defs.h */ -#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H -#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; -#include +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -#if defined(__linux__) -#include -#elif defined(__FreeBSD__) -#include -#endif + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -namespace simdjson { -namespace ppc64 { -namespace numberparsing { + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -// we don't have appropriate instructions, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); -#ifdef __BIG_ENDIAN__ -#if defined(__linux__) - val = bswap_64(val); -#elif defined(__FreeBSD__) - val = bswap64(val); -#endif -#endif - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + return i; } -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} -} // namespace numberparsing -} // namespace ppc64 -} // namespace simdjson +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -#define SIMDJSON_SWAR_NUMBER_PARSING 1 + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H -/* end file simdjson/ppc64/numberparsing_defs.h */ -/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ -/* begin file simdjson/ppc64/simd.h */ -#ifndef SIMDJSON_PPC64_SIMD_H -#define SIMDJSON_PPC64_SIMD_H + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + return i; +} -#include +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -namespace simdjson { -namespace ppc64 { -namespace { -namespace simd { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } -using __m128i = __vector unsigned char; + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -template struct base { - __m128i value; + return i; +} - // Zero constructor - simdjson_inline base() : value{__m128i()} {} +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - // Conversion to SIMD register - simdjson_inline operator const __m128i &() const { - return this->value; - } - simdjson_inline operator __m128i &() { return this->value; } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} - // Bit operations - simdjson_inline Child operator|(const Child other) const { - return vec_or(this->value, (__m128i)other); - } - simdjson_inline Child operator&(const Child other) const { - return vec_and(this->value, (__m128i)other); - } - simdjson_inline Child operator^(const Child other) const { - return vec_xor(this->value, (__m128i)other); - } - simdjson_inline Child bit_andnot(const Child other) const { - return vec_andc(this->value, (__m128i)other); - } - simdjson_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdjson_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdjson_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { - return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); - } +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; - static const int SIZE = sizeof(base>::value); + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } - template - simdjson_inline simd8 prev(simd8 prev_chunk) const { - __m128i chunk = this->value; -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve(this->value); - prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); -#endif - chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve((__m128i)chunk); -#endif - return chunk; - } -}; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdjson_inline simd8 splat(bool _value) { - return (__m128i)vec_splats((unsigned char)(-(!!_value))); - } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) - : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) - : base8(splat(_value)) {} + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - simdjson_inline int to_bitmask() const { - __vector unsigned long long result; - const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, - 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); - result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, - (__m128i)perm_mask)); -#ifdef __LITTLE_ENDIAN__ - return static_cast(result[1]); -#else - return static_cast(result[0]); -#endif - } - simdjson_inline bool any() const { - return !vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_inline simd8 operator~() const { - return this->value ^ (__m128i)splat(true); + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; } -}; -template struct base8_numeric : base8 { - static simdjson_inline simd8 splat(T value) { - (void)value; - return (__m128i)vec_splats(value); - } - static simdjson_inline simd8 zero() { return splat(0); } - static simdjson_inline simd8 load(const T values[16]) { - return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) - : base8(_value) {} + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - // Store to array - simdjson_inline void store(T dst[16]) const { - vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + exponent += exp_neg ? 0-exp : exp; } - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { - return (__m128i)((__m128i)this->value + (__m128i)other); - } - simdjson_inline simd8 operator-(const simd8 other) const { - return (__m128i)((__m128i)this->value - (__m128i)other); - } - simdjson_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } - simdjson_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; } + return d; +} - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); - } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted - // as a bitset). Passing a 0 value for mask would be equivalent to writing out - // every byte to output. Only the first 16 - count_ones(mask) bytes of the - // result are significant but 16 bytes get written. Design consideration: it - // seems like a function with the signature simd8 compress(uint32_t mask) - // would be sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L *output) const { - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - using internal::thintable_epi8; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. -#ifdef __LITTLE_ENDIAN__ - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask1], thintable_epi8[mask2]}; -#else - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask2], thintable_epi8[mask1]}; - shufmask = (__m128i)vec_reve((__m128i)shufmask); -#endif - // we increment by 0x08 the second half of the mask - shufmask = ((__m128i)shufmask) + - ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} - // this is the version "nearly pruned" - __m128i pruned = vec_perm(this->value, this->value, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); - vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} - template - simdjson_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -// Signed bytes -template <> struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, - v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - // Order-sensitive comparisons - simdjson_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - operator>(const simd8 other) const { - return (__m128i)vec_cmpgt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - operator<(const simd8 other) const { - return (__m128i)vec_cmplt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } -}; + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; } - // Saturated math - simdjson_inline simd8 - saturating_add(const simd8 other) const { - return (__m128i)vec_adds(this->value, (__m128i)other); - } - simdjson_inline simd8 - saturating_sub(const simd8 other) const { - return (__m128i)vec_subs(this->value, (__m128i)other); - } + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; - // Order-specific operations - simdjson_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max(this->value, (__m128i)other); - } - simdjson_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min(this->value, (__m128i)other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdjson_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdjson_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdjson_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdjson_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { - return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); - } - simdjson_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdjson_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdjson_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdjson_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { - return vec_all_eq(vec_and(this->value, (__m128i)bits), - (__m128i)vec_splats(0)); - } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); + exponent += exp_neg ? 0-exp : exp; } - template simdjson_inline simd8 shr() const { - return simd8( - (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } - template simdjson_inline simd8 shl() const { - return simd8( - (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; } -}; + return d; +} -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "PPC64 kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) - : chunks{simd8::load(ptr), simd8::load(ptr + 16), - simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0); - this->chunks[1].store(ptr + sizeof(simd8) * 1); - this->chunks[2].store(ptr + sizeof(simd8) * 2); - this->chunks[3].store(ptr + sizeof(simd8) * 3); + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; } - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; - simdjson_inline uint64_t compress(uint64_t mask, T *output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), - output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), - output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), - output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + exponent += exp_neg ? 0-exp : exp; } - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } + if (*p != '"') { return NUMBER_ERROR; } - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } -}; // struct simd8x64 + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} -} // namespace simd } // unnamed namespace -} // namespace ppc64 +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_SIMD_INPUT_H -/* end file simdjson/ppc64/simd.h */ -/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ -/* begin file simdjson/ppc64/stringparsing_defs.h */ -#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H -#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lsx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { -namespace { +namespace lsx { -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote - copy_and_find(const uint8_t *src, uint8_t *dst); +// +// internal::implementation_simdjson_result_base inline implementation +// - simdjson_inline bool has_quote_first() { - return ((bs_bits - 1) & quote_bits) != 0; - } - simdjson_inline bool has_backslash() { return bs_bits != 0; } - simdjson_inline int quote_index() { - return trailing_zeroes(quote_bits); - } - simdjson_inline int backslash_index() { - return trailing_zeroes(bs_bits); +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; } +} - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} -simdjson_inline backslash_and_quote -backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), - "backslash and quote finder must process fewer than " - "SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on - // PPC; therefore, we smash them together into a 64-byte mask and get the - // bitmask from there. - uint64_t bs_and_quote = - simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; } -} // unnamed namespace -} // namespace ppc64 +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H -/* end file simdjson/ppc64/stringparsing_defs.h */ +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +/* end file simdjson/generic/amalgamated.h for lsx */ +/* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 -/* end file simdjson/ppc64/begin.h */ -/* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for ppc64 */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ -/* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_BASE_H +#endif // SIMDJSON_LSX_H +/* end file simdjson/lsx.h */ +/* including simdjson/lsx/implementation.h: #include */ +/* begin file simdjson/lsx/implementation.h */ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { - -struct open_container; -class dom_parser_implementation; +namespace lsx { /** - * The type of a JSON number + * @private */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; }; -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for ppc64 */ -/* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H +#endif // SIMDJSON_LSX_IMPLEMENTATION_H +/* end file simdjson/lsx/implementation.h */ + +/* including simdjson/lsx/begin.h: #include */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + namespace { -namespace jsoncharutils { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); } -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); } -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); } -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + // -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // -// Note: we assume that surrogates are treated separately +// For example, prefix_xor(00100100) == 00011100 // -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; } -#endif -} // namespace jsoncharutils -} // unnamed namespace -} // namespace ppc64 +} // namespace numberparsing +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for ppc64 */ -/* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { -/// @private -namespace atomparsing { +namespace simd { -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; + // Zero constructor + simdjson_inline base() : value{__m128i()} {} -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} + // Forward-declared so they can be used by splat and friends. + template + struct simd8; -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} + static const int SIZE = sizeof(base>::value); -} // namespace atomparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for ppc64 */ -/* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} -namespace simdjson { -namespace ppc64 { + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } -}; + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } -} // namespace ppc64 -} // namespace simdjson + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } -namespace simdjson { -namespace ppc64 { + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; - _capacity = capacity; - return SUCCESS; -} + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } - _max_depth = max_depth; - return SUCCESS; -} + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } -} // namespace ppc64 -} // namespace simdjson + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ -/* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -namespace simdjson { -namespace ppc64 { + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -#if SIMDJSON_EXCEPTIONS +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); +namespace simdjson { +namespace lsx { +namespace { - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); +using namespace simd; - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} -} // namespace ppc64 +} // unnamed namespace +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ -/* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including generic/amalgamated.h for lsx: #include */ +/* begin file generic/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for lsx: #include */ +/* begin file generic/base.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include -#include - namespace simdjson { -namespace ppc64 { -namespace numberparsing { - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - +namespace lsx { namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} +struct json_character_block; -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif - { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - // The fast path has now failed, so we are failing back on the slower path. +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for lsx */ +/* including generic/dom_parser_implementation.h for lsx: #include */ +/* begin file generic/dom_parser_implementation.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace lsx { +namespace { - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for lsx */ +/* including generic/json_character_block.h for lsx: #include */ +/* begin file generic/json_character_block.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. +namespace simdjson { +namespace lsx { +namespace { - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } - mantissa += mantissa & 1; - mantissa >>= 1; + uint64_t _whitespace; + uint64_t _op; +}; - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for lsx */ +/* end file generic/amalgamated.h for lsx */ +/* including generic/stage1/amalgamated.h for lsx: #include */ +/* begin file generic/stage1/amalgamated.h for lsx */ +// Stuff other things depend on +/* including generic/stage1/base.h for lsx: #include */ +/* begin file generic/stage1/base.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} +} // namespace stage1 -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; +using utf8_validation::utf8_checker; -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for lsx */ +/* including generic/stage1/buf_block_reader.h for lsx: #include */ +/* begin file generic/stage1/buf_block_reader.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. +#include - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; + buf[sizeof(simd8x64)] = '\0'; + return buf; } -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; } -} // unnamed namespace - -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } } - return INVALID_NUMBER(src); + buf[sizeof(simd8x64)] = '\0'; + return buf; } -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; + buf[64] = '\0'; + return buf; } -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for lsx */ +/* including generic/stage1/json_escape_scanner.h for lsx: #include */ +/* begin file generic/stage1/json_escape_scanner.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Inlineable functions +namespace simdjson { +namespace lsx { namespace { +namespace stage1 { -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; } - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; } - return i; -} + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; } +}; - return i; -} +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for lsx */ +/* including generic/stage1/json_string_scanner.h for lsx: #include */ +/* begin file generic/stage1/json_string_scanner.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign // - bool negative = (*src == '-'); - src += uint8_t(negative); - + // prefix_xor flips on bits inside the string (and flips off the end quote). // - // Parse the integer part. + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; // - // Parse the decimal part. + // Check if we're still in a string at the end of the box so the next block will know // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + prev_in_string = uint64_t(static_cast(in_string) >> 63); - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; } + return SUCCESS; +} - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for lsx */ +/* including generic/stage1/utf8_lookup4_algorithm.h for lsx: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H - exponent += exp_neg ? 0-exp : exp; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +namespace simdjson { +namespace lsx { +namespace { +namespace utf8_validation { - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +using namespace simd; - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; -} + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } // - // Parse the decimal part. + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); } - } else { - overflow = p-src > 19; - } - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } - exponent += exp_neg ? 0-exp : exp; - } + }; // struct utf8_checker +} // namespace utf8_validation - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for lsx */ +/* including generic/stage1/json_scanner.h for lsx: #include */ +/* begin file generic/stage1/json_scanner.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } + // Helpers - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) - exponent += exp_neg ? 0-exp : exp; + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; } +}; - if (*p != '"') { return NUMBER_ERROR; } +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). // - // Assemble (or slow-parse) the float + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); } -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing - -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); } -} // namespace ppc64 +} // namespace stage1 +} // unnamed namespace +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for ppc64 */ +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for lsx */ -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +// All other declarations +/* including generic/stage1/find_next_document_index.h for lsx: #include */ +/* begin file generic/stage1/find_next_document_index.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { - -// -// internal::implementation_simdjson_result_base inline implementation -// +namespace lsx { +namespace { +namespace stage1 { -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; } + return 0; } -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for lsx */ +/* including generic/stage1/json_minifier.h for lsx: #include */ +/* begin file generic/stage1/json_minifier.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H -#if SIMDJSON_EXCEPTIONS +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); -} // namespace ppc64 -} // namespace simdjson + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ -/* end file simdjson/generic/amalgamated.h for ppc64 */ -/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ -/* begin file simdjson/ppc64/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/ppc64/end.h */ +} // namespace stage1 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson -#endif // SIMDJSON_PPC64_H -/* end file simdjson/ppc64.h */ -/* including simdjson/ppc64/implementation.h: #include */ -/* begin file simdjson/ppc64/implementation.h */ -#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H -#define SIMDJSON_PPC64_IMPLEMENTATION_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for lsx */ +/* including generic/stage1/json_structural_indexer.h for lsx: #include */ +/* begin file generic/stage1/json_structural_indexer.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 { +namespace simdjson { +namespace lsx { +namespace { +namespace stage1 { -/** - * @private - */ -class implementation final : public simdjson::implementation { +class bit_indexer { public: - simdjson_inline implementation() - : simdjson::implementation("ppc64", "PPC64 ALTIVEC", - internal::instruction_set::ALTIVEC) {} - - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, size_t max_length, - std::unique_ptr &dst) - const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, - uint8_t *dst, - size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -}; + uint32_t *tail; -} // namespace ppc64 -} // namespace simdjson + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} -#endif // SIMDJSON_PPC64_IMPLEMENTATION_H -/* end file simdjson/ppc64/implementation.h */ +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ -/* including simdjson/ppc64/begin.h: #include */ -/* begin file simdjson/ppc64/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ -#define SIMDJSON_IMPLEMENTATION ppc64 -/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ -/* begin file simdjson/ppc64/base.h */ -#ifndef SIMDJSON_PPC64_BASE_H -#define SIMDJSON_PPC64_BASE_H + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; + } -namespace simdjson { -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 { + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + } -class implementation; + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; -namespace { -namespace simd { -template struct simd8; -template struct simd8x64; -} // namespace simd -} // unnamed namespace + int cnt = static_cast(count_ones(bits)); -} // namespace ppc64 -} // namespace simdjson +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; -#endif // SIMDJSON_PPC64_BASE_H -/* end file simdjson/ppc64/base.h */ -/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ -/* begin file simdjson/ppc64/intrinsics.h */ -#ifndef SIMDJSON_PPC64_INTRINSICS_H -#define SIMDJSON_PPC64_INTRINSICS_H + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER -// This should be the correct header whether -// you use visual studio or other compilers. -#include +}; -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool -#undef bool -#endif +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; -#ifdef vector -#undef vector -#endif +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); -static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; -#endif // SIMDJSON_PPC64_INTRINSICS_H -/* end file simdjson/ppc64/intrinsics.h */ -/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ -/* begin file simdjson/ppc64/bitmanipulation.h */ -#ifndef SIMDJSON_PPC64_BITMANIPULATION_H -#define SIMDJSON_PPC64_BITMANIPULATION_H +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} -namespace simdjson { -namespace ppc64 { -namespace { +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num - 1); +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdjson_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); -} +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); #endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On-Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); } +} // namespace stage1 } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_BITMANIPULATION_H -/* end file simdjson/ppc64/bitmanipulation.h */ -/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ -/* begin file simdjson/ppc64/bitmask.h */ -#ifndef SIMDJSON_PPC64_BITMASK_H -#define SIMDJSON_PPC64_BITMASK_H +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for lsx */ +/* including generic/stage1/utf8_validator.h for lsx: #include */ +/* begin file generic/stage1/utf8_validator.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { +namespace stage1 { -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is -// encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { - // You can use the version below, however gcc sometimes miscompiles - // vec_pmsum_be, it happens somewhere around between 8 and 9th version. - // The performance boost was not noticeable, falling back to a usual - // implementation. - // __vector unsigned long long all_ones = {~0ull, ~0ull}; - // __vector unsigned long long mask = {bitmask, 0}; - // // Clang and GCC return different values for pmsum for ull so cast it to one. - // // Generally it is not specified by ALTIVEC ISA what is returned by - // // vec_pmsum_be. - // #if defined(__LITTLE_ENDIAN__) - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); - // #else - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); - // #endif - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); } +} // namespace stage1 } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif -/* end file simdjson/ppc64/bitmask.h */ -/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ -/* begin file simdjson/ppc64/numberparsing_defs.h */ -#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H -#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for lsx */ +/* end file generic/stage1/amalgamated.h for lsx */ +/* including generic/stage2/amalgamated.h for lsx: #include */ +/* begin file generic/stage2/amalgamated.h for lsx */ +// Stuff other things depend on +/* including generic/stage2/base.h for lsx: #include */ +/* begin file generic/stage2/base.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - -#if defined(__linux__) -#include -#elif defined(__FreeBSD__) -#include -#endif - namespace simdjson { -namespace ppc64 { -namespace numberparsing { - -// we don't have appropriate instructions, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); -#ifdef __BIG_ENDIAN__ -#if defined(__linux__) - val = bswap_64(val); -#elif defined(__FreeBSD__) - val = bswap64(val); -#endif -#endif - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} +namespace lsx { +namespace { +namespace stage2 { -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; -} // namespace numberparsing -} // namespace ppc64 +} // namespace stage2 +} // unnamed namespace +} // namespace lsx } // namespace simdjson -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H -/* end file simdjson/ppc64/numberparsing_defs.h */ -/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ -/* begin file simdjson/ppc64/simd.h */ -#ifndef SIMDJSON_PPC64_SIMD_H -#define SIMDJSON_PPC64_SIMD_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for lsx */ +/* including generic/stage2/tape_writer.h for lsx: #include */ +/* begin file generic/stage2/tape_writer.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include +#include namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { -namespace simd { - -using __m128i = __vector unsigned char; - -template struct base { - __m128i value; - - // Zero constructor - simdjson_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_inline operator const __m128i &() const { - return this->value; - } - simdjson_inline operator __m128i &() { return this->value; } - - // Bit operations - simdjson_inline Child operator|(const Child other) const { - return vec_or(this->value, (__m128i)other); - } - simdjson_inline Child operator&(const Child other) const { - return vec_and(this->value, (__m128i)other); - } - simdjson_inline Child operator^(const Child other) const { - return vec_xor(this->value, (__m128i)other); - } - simdjson_inline Child bit_andnot(const Child other) const { - return vec_andc(this->value, (__m128i)other); - } - simdjson_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdjson_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdjson_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} - - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { - return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); - } - - static const int SIZE = sizeof(base>::value); - - template - simdjson_inline simd8 prev(simd8 prev_chunk) const { - __m128i chunk = this->value; -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve(this->value); - prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); -#endif - chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve((__m128i)chunk); -#endif - return chunk; - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdjson_inline simd8 splat(bool _value) { - return (__m128i)vec_splats((unsigned char)(-(!!_value))); - } - - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) - : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) - : base8(splat(_value)) {} +namespace stage2 { - simdjson_inline int to_bitmask() const { - __vector unsigned long long result; - const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, - 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; - result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, - (__m128i)perm_mask)); -#ifdef __LITTLE_ENDIAN__ - return static_cast(result[1]); -#else - return static_cast(result[0]); -#endif - } - simdjson_inline bool any() const { - return !vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_inline simd8 operator~() const { - return this->value ^ (__m128i)splat(true); - } -}; + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; -template struct base8_numeric : base8 { - static simdjson_inline simd8 splat(T value) { - (void)value; - return (__m128i)vec_splats(value); - } - static simdjson_inline simd8 zero() { return splat(0); } - static simdjson_inline simd8 load(const T values[16]) { - return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) - : base8(_value) {} + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; - // Store to array - simdjson_inline void store(T dst[16]) const { - vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); - } + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { - return (__m128i)((__m128i)this->value + (__m128i)other); - } - simdjson_inline simd8 operator-(const simd8 other) const { - return (__m128i)((__m128i)this->value - (__m128i)other); - } - simdjson_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdjson_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); - } + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted - // as a bitset). Passing a 0 value for mask would be equivalent to writing out - // every byte to output. Only the first 16 - count_ones(mask) bytes of the - // result are significant but 16 bytes get written. Design consideration: it - // seems like a function with the signature simd8 compress(uint32_t mask) - // would be sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L *output) const { - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - using internal::thintable_epi8; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. -#ifdef __LITTLE_ENDIAN__ - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask1], thintable_epi8[mask2]}; -#else - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask2], thintable_epi8[mask1]}; - shufmask = (__m128i)vec_reve((__m128i)shufmask); -#endif - // we increment by 0x08 the second half of the mask - shufmask = ((__m128i)shufmask) + - ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - // this is the version "nearly pruned" - __m128i pruned = vec_perm(this->value, this->value, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); - vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); - } +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer - template - simdjson_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} -// Signed bytes -template <> struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, - v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} - // Order-sensitive comparisons - simdjson_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - operator>(const simd8 other) const { - return (__m128i)vec_cmpgt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - operator<(const simd8 other) const { - return (__m128i)vec_cmplt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } -}; +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} - // Saturated math - simdjson_inline simd8 - saturating_add(const simd8 other) const { - return (__m128i)vec_adds(this->value, (__m128i)other); - } - simdjson_inline simd8 - saturating_sub(const simd8 other) const { - return (__m128i)vec_subs(this->value, (__m128i)other); - } +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} - // Order-specific operations - simdjson_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max(this->value, (__m128i)other); - } - simdjson_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min(this->value, (__m128i)other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdjson_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdjson_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdjson_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdjson_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { - return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); - } - simdjson_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdjson_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdjson_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdjson_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { - return vec_all_eq(vec_and(this->value, (__m128i)bits), - (__m128i)vec_splats(0)); - } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdjson_inline simd8 shr() const { - return simd8( - (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); - } - template simdjson_inline simd8 shl() const { - return simd8( - (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); - } -}; +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "PPC64 kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) - : chunks{simd8::load(ptr), simd8::load(ptr + 16), - simd8::load(ptr + 32), simd8::load(ptr + 48)} {} +} // namespace stage2 +} // unnamed namespace +} // namespace lsx +} // namespace simdjson - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0); - this->chunks[1].store(ptr + sizeof(simd8) * 1); - this->chunks[2].store(ptr + sizeof(simd8) * 2); - this->chunks[3].store(ptr + sizeof(simd8) * 3); - } +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for lsx */ +/* including generic/stage2/logger.h for lsx: #include */ +/* begin file generic/stage2/logger.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline uint64_t compress(uint64_t mask, T *output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), - output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), - output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), - output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } +#include - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace lsx { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } } - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } } - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i -} // namespace simd +} // namespace logger } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_SIMD_INPUT_H -/* end file simdjson/ppc64/simd.h */ -/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ -/* begin file simdjson/ppc64/stringparsing_defs.h */ -#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H -#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H +/* end file generic/stage2/logger.h for lsx */ + +// All other declarations +/* including generic/stage2/json_iterator.h for lsx: #include */ +/* begin file generic/stage2/json_iterator.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { +namespace stage2 { -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { +class json_iterator { public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote - copy_and_find(const uint8_t *src, uint8_t *dst); + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; - simdjson_inline bool has_quote_first() { - return ((bs_bits - 1) & quote_bits) != 0; + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } } - simdjson_inline bool has_backslash() { return bs_bits != 0; } - simdjson_inline int quote_index() { - return trailing_zeroes(quote_bits); + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); } - simdjson_inline int backslash_index() { - return trailing_zeroes(bs_bits); + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } } - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } -simdjson_inline backslash_and_quote -backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), - "backslash and quote finder must process fewer than " - "SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on - // PPC; therefore, we smash them together into a 64-byte mask and get the - // bitmask from there. - uint64_t bs_and_quote = - simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; -} +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); -#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H -/* end file simdjson/ppc64/stringparsing_defs.h */ + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } -#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 -/* end file simdjson/ppc64/begin.h */ -/* including generic/amalgamated.h for ppc64: #include */ -/* begin file generic/amalgamated.h for ppc64 */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) -#error generic/dependencies.h must be included before generic/amalgamated.h! -#endif + return SUCCESS; -/* including generic/base.h for ppc64: #include */ -/* begin file generic/base.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_BASE_H +} // walk_document() -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} -namespace simdjson { -namespace ppc64 { -namespace { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} -struct json_character_block; +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} -#endif // SIMDJSON_SRC_GENERIC_BASE_H -/* end file generic/base.h for ppc64 */ -/* including generic/dom_parser_implementation.h for ppc64: #include */ -/* begin file generic/dom_parser_implementation.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} -// Interface a dom parser implementation must fulfill -namespace simdjson { -namespace ppc64 { -namespace { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); -simdjson_inline bool is_ascii(const simd8x64& input); +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} +} // namespace stage2 } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file generic/dom_parser_implementation.h for ppc64 */ -/* including generic/json_character_block.h for ppc64: #include */ -/* begin file generic/json_character_block.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for lsx */ +/* including generic/stage2/stringparsing.h for lsx: #include */ +/* begin file generic/stage2/stringparsing.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { +/// @private +namespace stringparsing { -struct json_character_block { - static simdjson_inline json_character_block classify(const simd::simd8x64& in); +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_inline uint64_t op() const noexcept { return _op; } - simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - uint64_t _whitespace; - uint64_t _op; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; -#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H -/* end file generic/json_character_block.h for ppc64 */ -/* end file generic/amalgamated.h for ppc64 */ -/* including generic/stage1/amalgamated.h for ppc64: #include */ -/* begin file generic/stage1/amalgamated.h for ppc64 */ -// Stuff other things depend on -/* including generic/stage1/base.h for ppc64: #include */ -/* begin file generic/stage1/base.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} -class bit_indexer; -template -struct buf_block_reader; -struct json_block; -class json_minifier; -class json_scanner; -struct json_string_block; -class json_string_scanner; -class json_structural_indexer; -} // namespace stage1 +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } -namespace utf8_validation { -struct utf8_checker; -} // namespace utf8_validation + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} -using utf8_validation::utf8_checker; +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} + +} // namespace stringparsing } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H -/* end file generic/stage1/base.h for ppc64 */ -/* including generic/stage1/buf_block_reader.h for ppc64: #include */ -/* begin file generic/stage1/buf_block_reader.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for lsx */ +/* including generic/stage2/structural_iterator.h for lsx: #include */ +/* begin file generic/stage2/structural_iterator.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { -namespace stage1 { +namespace stage2 { -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { +class structural_iterator { public: - simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_inline size_t block_index(); - simdjson_inline bool has_full_block() const; - simdjson_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_inline size_t get_remainder(uint8_t *dst) const; - simdjson_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] <= ' ') { buf[i] = '_'; } - if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); } - buf[64] = '\0'; - return buf; -} - -template -simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} -template -simdjson_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; -} // namespace stage1 +} // namespace stage2 } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H -/* end file generic/stage1/buf_block_reader.h for ppc64 */ -/* including generic/stage1/json_escape_scanner.h for ppc64: #include */ -/* begin file generic/stage1/json_escape_scanner.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for lsx */ +/* including generic/stage2/tape_builder.h for lsx: #include */ +/* begin file generic/stage2/tape_builder.h for lsx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { -namespace stage1 { +namespace stage2 { -/** - * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). - */ -struct json_escape_scanner { - /** The actual escape characters (the backslashes themselves). */ - uint64_t next_is_escaped = 0ULL; +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; - struct escaped_and_escape { - /** - * Mask of escaped characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 0100100010100101000 - * n \ \ n \ \ - * ``` - */ - uint64_t escaped; - /** - * Mask of escape characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 1001000101001010001 - * \ \ \ \ \ \ \ - * ``` - */ - uint64_t escape; - }; + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** - * Get a mask of both escape and escaped characters (the characters following a backslash). + * Called when a key in a field is encountered. * - * @param potential_escape A mask of the character that can escape others (but could be - * escaped itself). e.g. block.eq('\\') + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. */ - simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { - -#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT - if (!backslash) { return {next_escaped_without_backslashes(), 0}; } -#endif - - // | | Mask (shows characters instead of 1's) | Depth | Instructions | - // |--------------------------------|----------------------------------------|-------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | - // | | ` even odd even odd odd` | | | - // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) - // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) - // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) - // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () - // (*) this is not needed until the next iteration - uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); - uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); - uint64_t escape = escape_and_terminal_code & backslash; - this->next_is_escaped = escape >> 63; - return {escaped, escape}; - } - -private: - static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; - - simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { - uint64_t escaped = this->next_is_escaped; - this->next_is_escaped = 0; - return escaped; - } + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** - * Returns a mask of the next escape characters (masking out escaped backslashes), along with - * any non-backslash escape codes. - * - * \n \\n \\\n \\\\n returns: - * \n \ \ \n \ \ - * 11 100 1011 10100 - * - * You are expected to mask out the first bit yourself if the previous block had a trailing - * escape. + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. * - * & the result with potential_escape to get just the escape characters. - * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. */ - static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { - // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: - // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be - // inverted (\\\ would be 010 instead of 101). - // - // ``` - // string: | ____\\\\_\\\\_____ | - // maybe_escaped | ODD | \ \ \ \ | - // even-aligned ^^^ ^^^^ odd-aligned - // ``` - // - // Taking that into account, our basic strategy is: - // - // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for - // odd-aligned runs. - // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the - // odd bits in odd-aligned runs. - // 3. & with backslash to clean up any stray bits. - // runs are set to 0, and then XORing with "odd": - // - // | | Mask (shows characters instead of 1's) | Instructions | - // |--------------------------------|----------------------------------------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | - // | | ` even odd even odd odd` | - // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) - // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) - // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) - // - - // Escaped characters are characters following an escape. - uint64_t maybe_escaped = potential_escape << 1; - - // To distinguish odd from even escape sequences, therefore, we turn on any *starting* - // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) - // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. - // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. - // - All other odd bytes are 1, and even bytes are 0. - uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; - uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; - - // Now we flip all odd bytes back with xor. This: - // - Makes odd runs of backslashes go from 0000 to 1010 - // - Makes even runs of backslashes go from 1111 to 1010 - // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) - // - Resets all other bytes to 0 - return even_series_codes_and_odd_bits ^ ODD_BITS; - } -}; - -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_escape_scanner.h for ppc64 */ -/* including generic/stage1/json_string_scanner.h for ppc64: #include */ -/* begin file generic/stage1/json_string_scanner.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : - _escaped(escaped), _quote(quote), _in_string(in_string) {} + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-escaped ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + /** Next location to write to tape */ + tape_writer tape; private: - // Scans for escape characters - json_escape_scanner escape_scanner{}; - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; -}; + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = escape_scanner.next(backslash).escaped; - const uint64_t quote = in.eq('"') & ~escaped; + simdjson_inline tape_builder(dom::document &doc) noexcept; - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder - // - // Check if we're still in a string at the end of the box so the next block will know - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} - // Use ^ to turn the beginning quote off, and the end quote on. +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block(escaped, quote, in_string); +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; } -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_string_scanner.h for ppc64 */ -/* including generic/stage1/utf8_lookup4_algorithm.h for ppc64: #include */ -/* begin file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} -namespace simdjson { -namespace ppc64 { -namespace { -namespace utf8_validation { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} -using namespace simd; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} - simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = must_be_2_3_continuation(prev2, prev3); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; +// private: - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} - simdjson_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} - }; // struct utf8_checker -} // namespace utf8_validation +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} +} // namespace stage2 } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H -/* end file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ -/* including generic/stage1/json_scanner.h for ppc64: #include */ -/* begin file generic/stage1/json_scanner.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for lsx */ +/* end file generic/stage2/amalgamated.h for lsx */ +// +// Stage 1 +// namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} +namespace lsx { - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} - // Helpers +namespace { - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } +using namespace simd; - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Inspired by haswell. + // LSX use low 5 bits as index. For the 6 operators (:,[]{}), the unique-5bits is [6:2]. + // The ASCII white-space and operators have these values: (char, hex, unique-5bits) + // (' ', 20, 00000) ('\t', 09, 01001) ('\n', 0A, 01010) ('\r', 0D, 01101) + // (',', 2C, 01011) (':', 3A, 01110) ('[', 5B, 10110) ('{', 7B, 11110) (']', 5D, 10111) ('}', 7D, 11111) + const simd8 ws_table = simd8::repeat_16( + ' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0 + ); + const simd8 op_table_lo = simd8::repeat_16( + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, ':', 0 + ); + const simd8 op_table_hi = simd8::repeat_16( + 0, 0, 0, 0, 0, 0, '[', ']', 0, 0, 0, 0, 0, 0, '{', '}' + ); + uint64_t ws = in.eq({ + in.chunks[0].lookup_16(ws_table), + in.chunks[1].lookup_16(ws_table), + in.chunks[2].lookup_16(ws_table), + in.chunks[3].lookup_16(ws_table) + }); + uint64_t op = in.eq({ + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[0].shr<2>()), + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[1].shr<2>()), + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[2].shr<2>()), + __lsx_vshuf_b(op_table_hi, op_table_lo, in.chunks[3].shr<2>()) + }); - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; + return { ws, op }; +} -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() = default; - simdjson_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} +} // unnamed namespace +} // namespace lsx +} // namespace simdjson // -// Check if the current character immediately follows a matching character. +// Stage 2 // -// For example, this checks for quotes with backslashes in front of them: + // -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// Implementation-specific overrides // -simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; +namespace simdjson { +namespace lsx { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return lsx::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); } -simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return lsx::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); } -simdjson_inline error_code json_scanner::finish() { - return string_scanner.finish(); +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lsx::stage1::generic_validate_utf8(buf,len); } -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { + return lsx::stringparsing::parse_string(src, dst, allow_replacement); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return lsx::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H -/* end file generic/stage1/json_scanner.h for ppc64 */ +/* including simdjson/lsx/end.h: #include */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// All other declarations -/* including generic/stage1/find_next_document_index.h for ppc64: #include */ -/* begin file generic/stage1/find_next_document_index.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_SRC_LSX_CPP +/* end file lsx.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_LASX +/* including lasx.cpp: #include */ +/* begin file lasx.cpp */ +#ifndef SIMDJSON_SRC_LASX_CPP +#define SIMDJSON_SRC_LASX_CPP /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { +/* including simdjson/lasx.h: #include */ +/* begin file simdjson/lasx.h */ +#ifndef SIMDJSON_LASX_H +#define SIMDJSON_LASX_H + +/* including simdjson/lasx/begin.h: #include "simdjson/lasx/begin.h" */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +namespace simdjson { /** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} + * Implementation for LASX. + */ +namespace lasx { -} // namespace stage1 +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd } // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -/* end file generic/stage1/find_next_document_index.h for ppc64 */ -/* including generic/stage1/json_minifier.h for ppc64: #include */ -/* begin file generic/stage1/json_minifier.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lasx { namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; -private: - simdjson_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} -simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); } -template<> -simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); } -template<> -simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); } -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; } -} // namespace stage1 } // unnamed namespace -} // namespace ppc64 +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H -/* end file generic/stage1/json_minifier.h for ppc64 */ -/* including generic/stage1/json_structural_indexer.h for ppc64: #include */ -/* begin file generic/stage1/json_structural_indexer.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) +#include namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { +namespace lasx { +namespace numberparsing { -class bit_indexer { -public: - uint32_t *tail; +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} - simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} -#if SIMDJSON_PREFER_REVERSE_BITS - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 #else - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif - simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } -#endif // SIMDJSON_PREFER_REVERSE_BITS +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H - template - simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { - write_index(idx, bits, START); - SIMDJSON_IF_CONSTEXPR (N > 1) { - write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); } - return START+N; - } + }; - template - simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { - write_indexes(idx, bits); - SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { - if (simdjson_unlikely((START+STEP) < cnt)) { - write_indexes_stepped<(START+STEP(idx, bits, cnt); - } + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); } - return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; - } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it - // will provide its own version of the code. -#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - simdjson_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - int cnt = static_cast(count_ones(bits)); + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} -#if SIMDJSON_PREFER_REVERSE_BITS - bits = reverse_bits(bits); -#endif -#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP - static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; -#else - static constexpr const int STEP = 4; -#endif - static constexpr const int STEP_UNTIL = 24; + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } - write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); - SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { - if (simdjson_unlikely(STEP_UNTIL < cnt)) { - for (int i=STEP_UNTIL; i operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); } - this->tail += cnt; - } -#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; -}; + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -private: - simdjson_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } + }; -simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -// Skip the last character if it is partial -simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); } - } - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } + }; -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -template<> -simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} -template<> -simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } -simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); -#if SIMDJSON_UTF8VALIDATION - checker.check_next_input(in); -#endif - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } -simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); } - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); } - } - checker.check_eof(); - return checker.errors(); -} -} // namespace stage1 + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd } // unnamed namespace -} // namespace ppc64 +} // namespace lasx } // namespace simdjson -// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. -#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H -/* end file generic/stage1/json_structural_indexer.h for ppc64 */ -/* including generic/stage1/utf8_validator.h for ppc64: #include */ -/* begin file generic/stage1/utf8_validator.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lasx { namespace { -namespace stage1 { -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} +using namespace simd; -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; } -} // namespace stage1 } // unnamed namespace -} // namespace ppc64 +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H -/* end file generic/stage1/utf8_validator.h for ppc64 */ -/* end file generic/stage1/amalgamated.h for ppc64 */ -/* including generic/stage2/amalgamated.h for ppc64: #include */ -/* begin file generic/stage2/amalgamated.h for ppc64 */ -// Stuff other things depend on -/* including generic/stage2/base.h for ppc64: #include */ -/* begin file generic/stage2/base.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including simdjson/generic/amalgamated.h for lasx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lasx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lasx */ +#ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { -namespace { -namespace stage2 { +namespace lasx { -class json_iterator; -class structural_iterator; -struct tape_builder; -struct tape_writer; +struct open_container; +class dom_parser_implementation; -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H -/* end file generic/stage2/base.h for ppc64 */ -/* including generic/stage2/tape_writer.h for ppc64: #include */ -/* begin file generic/stage2/tape_writer.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lasx */ +/* including simdjson/generic/jsoncharutils.h for lasx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lasx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - namespace simdjson { -namespace ppc64 { +namespace lasx { namespace { -namespace stage2 { +namespace jsoncharutils { -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} - /** Write a signed 64-bit value to tape. */ - simdjson_inline void append_s64(int64_t value) noexcept; +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} - /** Write an unsigned 64-bit value to tape. */ - simdjson_inline void append_u64(uint64_t value) noexcept; +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} - /** Write a double value to tape. */ - simdjson_inline void append_double(double value) noexcept; +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_inline void skip() noexcept; +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_inline void skip_large_integer() noexcept; +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lasx */ +/* including simdjson/generic/atomparsing.h for lasx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_inline void skip_double() noexcept; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; +#include -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct tape_writer +namespace simdjson { +namespace lasx { +namespace { +/// @private +namespace atomparsing { -simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } -simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} -/** Write a double value to tape. */ -simdjson_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); } -simdjson_inline void tape_writer::skip() noexcept { - next_tape_loc++; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -simdjson_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } } -simdjson_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } -simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } } -template -simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } } -} // namespace stage2 +} // namespace atomparsing } // unnamed namespace -} // namespace ppc64 +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H -/* end file generic/stage2/tape_writer.h for ppc64 */ -/* including generic/stage2/logger.h for ppc64: #include */ -/* begin file generic/stage2/logger.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lasx */ +/* including simdjson/generic/dom_parser_implementation.h for lasx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lasx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include +namespace simdjson { +namespace lasx { +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace ppc64 { -namespace { -namespace logger { +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - static int log_depth; // Not threadsafe. Log only. + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - // Helper to turn unprintable or newline characters into spaces - static simdjson_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } +}; - // Print the header and set up log_start - static simdjson_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } +} // namespace lasx +} // namespace simdjson - simdjson_unused static simdjson_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } +namespace simdjson { +namespace lasx { - // Logs a single line from the stage 2 DOM parser - template - static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; -#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H -/* end file generic/stage2/logger.h for ppc64 */ + _capacity = capacity; + return SUCCESS; +} -// All other declarations -/* including generic/stage2/json_iterator.h for ppc64: #include */ -/* begin file generic/stage2/json_iterator.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lasx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { -namespace { -namespace stage2 { +namespace lasx { -class json_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; - uint32_t depth{0}; +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { /** - * Walk the JSON document. - * - * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as - * the first parameter; some callbacks have other parameters as well: - * - * - visit_document_start() - at the beginning. - * - visit_document_end() - at the end (if things were successful). - * - * - visit_array_start() - at the start `[` of a non-empty array. - * - visit_array_end() - at the end `]` of a non-empty array. - * - visit_empty_array() - when an empty array is encountered. - * - * - visit_object_end() - at the start `]` of a non-empty object. - * - visit_object_start() - at the end `]` of a non-empty object. - * - visit_empty_object() - when an empty object is encountered. - * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is - * guaranteed to point at the first quote of the string (`"key"`). - * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. - * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. - * - * - increment_count(iter) - each time a value is found in an array or object. + * Create a new empty result with error = UNINITIALIZED. */ - template - simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + simdjson_inline implementation_simdjson_result_base() noexcept = default; /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. + * Create a new error result. */ - simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). + * Create a new successful result. */ - simdjson_inline const uint8_t *advance() noexcept; + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + /** - * Get the remaining length of the document, from the start of the current token. + * Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_inline size_t remaining_len() const noexcept; + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + /** - * Check if we are at the end of the document. + * Move the value and the error to the provided variables. * - * If this is true, there are no more tokens. + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_inline bool at_eof() const noexcept; + simdjson_inline void tie(T &value, error_code &error) && noexcept; + /** - * Check if we are at the beginning of the document. + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline bool at_beginning() const noexcept; - simdjson_inline uint8_t last_structural() const noexcept; + simdjson_inline error_code get(T &value) && noexcept; /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. + * The error. */ - simdjson_inline void log_value(const char *type) const noexcept; + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + /** - * Log the start of a multipart value. + * Get the result value. * - * Set LOG_ENABLED=true in logger.h to see logging. + * @throw simdjson_error if there was an error. */ - simdjson_inline void log_start_value(const char *type) const noexcept; + simdjson_inline T& value() & noexcept(false); + /** - * Log the end of a multipart value. + * Take the result value (move it). * - * Set LOG_ENABLED=true in logger.h to see logging. + * @throw simdjson_error if there was an error. */ - simdjson_inline void log_end_value(const char *type) const noexcept; + simdjson_inline T&& value() && noexcept(false); + /** - * Log an error. + * Take the result value (move it). * - * Set LOG_ENABLED=true in logger.h to see logging. + * @throw simdjson_error if there was an error. */ - simdjson_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} + simdjson_inline T&& take_value() && noexcept(false); -simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); -simdjson_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - // Use the fact that most scalars are going to be either strings or numbers. - if(*value == '"') { - return visitor.visit_string(*this, value); - } else if (((*value - '0') < 10) || (*value == '-')) { - return visitor.visit_number(*this, value); - } - // true, false, null are uncommon. - switch (*value) { - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} +#endif // SIMDJSON_EXCEPTIONS -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H -/* end file generic/stage2/json_iterator.h for ppc64 */ -/* including generic/stage2/stringparsing.h for ppc64: #include */ -/* begin file generic/stage2/stringparsing.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +/* including simdjson/generic/numberparsing.h for lasx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times +#include +#include +#include namespace simdjson { -namespace ppc64 { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. +namespace lasx { +namespace numberparsing { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; +namespace { -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr, bool allow_replacement) { - // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - constexpr uint32_t substitution_code_point = 0xfffd; - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; } else { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - // We have already checked that the high surrogate is valid and - // (code_point - 0xd800) < 1024. - // - // Check that code_point_2 is in the range 0xdc00..0xdfff - // and that code_point_2 was parsed from valid hex. - uint32_t low_bit = code_point_2 - 0xdc00; - if (low_bit >> 10) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } + // The fast path has now failed, so we are failing back on the slower path. - } - } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { - // If we encounter a low surrogate (not preceded by a high surrogate) - // then we have an error. - if(!allow_replacement) { return false; } - code_point = substitution_code_point; + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} -// handle a unicode codepoint using the wobbly convention -// https://simonsapin.github.io/wtf-8/ -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. // - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); - uint32_t low_bit = code_point_2 - 0xdc00; - if ((low_bit >> 10) == 0) { - code_point = - (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } - } - } + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; -/** - * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There - * must be an unescaped quote terminating the string. It returns the final output - * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large - * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + - * SIMDJSON_PADDING bytes. - */ -simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. } -} + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); -simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { - // It is not ideal that this function is nearly identical to parse_string. - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint_wobbly(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up } } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; } -} // namespace stringparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H -/* end file generic/stage2/stringparsing.h for ppc64 */ -/* including generic/stage2/structural_iterator.h for ppc64: #include */ -/* begin file generic/stage2/structural_iterator.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage2 { + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} -class structural_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} - // Start a structural - simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { - } - // Get the buffer position of the current structural character - simdjson_inline const uint8_t* current() { - return &buf[*(next_structural-1)]; - } - // Get the current structural character - simdjson_inline char current_char() { - return buf[*(next_structural-1)]; - } - // Get the next structural character without advancing - simdjson_inline char peek_next_char() { - return buf[*next_structural]; - } - simdjson_inline const uint8_t* peek() { - return &buf[*next_structural]; - } - simdjson_inline const uint8_t* advance() { - return &buf[*(next_structural++)]; - } - simdjson_inline char advance_char() { - return buf[*(next_structural++)]; - } - simdjson_inline size_t remaining_len() { - return dom_parser.len - *(next_structural-1); +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} - simdjson_inline bool at_end() { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; } - simdjson_inline bool at_beginning() { - return next_structural == dom_parser.structural_indexes.get(); +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); } -}; + return SUCCESS; +} -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H -/* end file generic/stage2/structural_iterator.h for ppc64 */ -/* including generic/stage2/tape_builder.h for ppc64: #include */ -/* begin file generic/stage2/tape_builder.h for ppc64 */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage2 { + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} -struct tape_builder { - template - simdjson_warn_unused static simdjson_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; +} // unnamed namespace - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} - simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); - simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else - simdjson_inline tape_builder(dom::document &doc) noexcept; +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_inline void on_end_string(uint8_t *dst) noexcept; -}; // struct tape_builder + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -template -simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} +// Inlineable functions +namespace { -simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } - on_end_string(dst); - return SUCCESS; -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); + return i; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. + // Parse the integer part. // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; + return i; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } -simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; } -// private: +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -/* end file generic/stage2/tape_builder.h for ppc64 */ -/* end file generic/stage2/amalgamated.h for ppc64 */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -// -// Stage 1 -// -namespace simdjson { -namespace ppc64 { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -namespace { + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -using namespace simd; + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - simd8x64 v( - (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), - (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), - (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), - (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) - ); + exponent += exp_neg ? 0-exp : exp; + } - uint64_t op = simd8x64( - v.chunks[0].any_bits_set(0x7), - v.chunks[1].any_bits_set(0x7), - v.chunks[2].any_bits_set(0x7), - v.chunks[3].any_bits_set(0x7) - ).to_bitmask(); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - uint64_t whitespace = simd8x64( - v.chunks[0].any_bits_set(0x18), - v.chunks[1].any_bits_set(0x18), - v.chunks[2].any_bits_set(0x18), - v.chunks[3].any_bits_set(0x18) - ).to_bitmask(); + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - return { whitespace, op }; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; } -simdjson_inline bool is_ascii(const simd8x64& input) { - // careful: 0x80 is not ascii. - return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); } -simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; } -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 - return is_third_byte | is_fourth_byte; +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; } -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -// -// Stage 2 -// +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -// -// Implementation-specific overrides -// -namespace simdjson { -namespace ppc64 { + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); -} + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); -} + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return ppc64::stage1::generic_validate_utf8(buf,len); -} + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} + exponent += exp_neg ? 0-exp : exp; + } -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { - return ppc64::stringparsing::parse_string(src, dst, replacement_char); -} + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { - return ppc64::stringparsing::parse_wobbly_string(src, dst); -} + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; } -} // namespace ppc64 -} // namespace simdjson +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -/* including simdjson/ppc64/end.h: #include */ -/* begin file simdjson/ppc64/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/ppc64/end.h */ + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -#endif // SIMDJSON_SRC_PPC64_CPP -/* end file ppc64.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_WESTMERE -/* including westmere.cpp: #include */ -/* begin file westmere.cpp */ -#ifndef SIMDJSON_SRC_WESTMERE_CPP -#define SIMDJSON_SRC_WESTMERE_CPP + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -/* including simdjson/westmere.h: #include */ -/* begin file simdjson/westmere.h */ -#ifndef SIMDJSON_WESTMERE_H -#define SIMDJSON_WESTMERE_H + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ -/* begin file simdjson/westmere/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ -#define SIMDJSON_IMPLEMENTATION westmere -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H + exponent += exp_neg ? 0-exp : exp; + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + if (*p != '"') { return NUMBER_ERROR; } -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -class implementation; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} -namespace { -namespace simd { +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING -template struct simd8; -template struct simd8x64; +} // namespace numberparsing -} // namespace simd -} // unnamed namespace +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lasx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - - -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif +namespace simdjson { +namespace lasx { -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); +// +// internal::implementation_simdjson_result_base inline implementation +// -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") -#endif +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_EXCEPTIONS -namespace simdjson { -namespace westmere { -namespace { +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; } -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); } -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +/* end file simdjson/generic/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ -/* begin file simdjson/westmere/bitmask.h */ -#ifndef SIMDJSON_WESTMERE_BITMASK_H -#define SIMDJSON_WESTMERE_BITMASK_H +#endif // SIMDJSON_LASX_H +/* end file simdjson/lasx.h */ +/* including simdjson/lasx/implementation.h: #include */ +/* begin file simdjson/lasx/implementation.h */ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace { +namespace lasx { -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; -} // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BITMASK_H -/* end file simdjson/westmere/bitmask.h */ -/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ -/* begin file simdjson/westmere/numberparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_LASX_IMPLEMENTATION_H +/* end file simdjson/lasx/implementation.h */ -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H +/* including simdjson/lasx/begin.h: #include */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { /** - * Implementation for Westmere (Intel SSE4.2). + * Implementation for LASX. */ -namespace westmere { +namespace lasx { class implementation; namespace { namespace simd { - template struct simd8; template struct simd8x64; - } // namespace simd } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - - -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif +// This should be the correct header whether +// you use visual studio or other compilers. +#include -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace numberparsing { +namespace lasx { +namespace { -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); } -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); } -} // namespace numberparsing -} // namespace westmere -} // namespace simdjson +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} -#define SIMDJSON_SWAR_NUMBER_PARSING 1 +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} -#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -/* end file simdjson/westmere/numberparsing_defs.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { -namespace simd { - - template - struct base { - __m128i value; - - // Zero constructor - simdjson_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } - - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; - - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} - - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } - - static const int SIZE = sizeof(base>::value); - - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } - - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; - - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} - - // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } - - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } - - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; - - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; - - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } - - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } - - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } - - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } - - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } - - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } - - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 -} // namespace simd -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ -/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ -/* begin file simdjson/westmere/stringparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +} // unnamed namespace +} // namespace lasx +} // namespace simdjson -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace westmere { -namespace { +namespace lasx { +namespace numberparsing { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 #else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} +#define SIMDJSON_SWAR_NUMBER_PARSING 1 #endif - -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); #endif -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { namespace simd { + // Forward-declared so they can be used by splat and friends. template struct base { - __m128i value; + __m256i value; // Zero constructor - simdjson_inline base() : value{__m128i()} {} + simdjson_inline base() : value{__m256i()} {} // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} + simdjson_inline base(const __m256i _value) : value(_value) {} // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + template> struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( @@ -37181,68 +49102,73 @@ namespace simd { T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } - - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_inline void compress(uint16_t mask, L * output) const { + simdjson_inline void compress(uint32_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; } template @@ -37264,72 +49190,84 @@ namespace simd { template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) @@ -37337,84 +49275,91 @@ namespace simd { simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } - - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); } - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask + this->chunks[1] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] + this->chunks[1] == other.chunks[1] ).to_bitmask(); } @@ -37422,23 +49367,31 @@ namespace simd { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask + this->chunks[1] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { using namespace simd; @@ -37462,2610 +49415,2936 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + 16); - v0.store(dst); - v1.store(dst + 16); - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + simd8 v(src); + v.store(dst); return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits }; } } // unnamed namespace -} // namespace westmere +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including generic/amalgamated.h for lasx: #include */ +/* begin file generic/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for lasx: #include */ +/* begin file generic/base.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for lasx */ +/* including generic/dom_parser_implementation.h for lasx: #include */ +/* begin file generic/dom_parser_implementation.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace lasx { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for lasx */ +/* including generic/json_character_block.h for lasx: #include */ +/* begin file generic/json_character_block.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for lasx */ +/* end file generic/amalgamated.h for lasx */ +/* including generic/stage1/amalgamated.h for lasx: #include */ +/* begin file generic/stage1/amalgamated.h for lasx */ +// Stuff other things depend on +/* including generic/stage1/base.h for lasx: #include */ +/* begin file generic/stage1/base.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for lasx */ +/* including generic/stage1/buf_block_reader.h for lasx: #include */ +/* begin file generic/stage1/buf_block_reader.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -/* end file simdjson/westmere/stringparsing_defs.h */ -/* end file simdjson/westmere/begin.h */ -/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for westmere */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif - -/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for westmere */ -#ifndef SIMDJSON_GENERIC_BASE_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for lasx */ +/* including generic/stage1/json_escape_scanner.h for lasx: #include */ +/* begin file generic/stage1/json_escape_scanner.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { - -struct open_container; -class dom_parser_implementation; +namespace lasx { +namespace { +namespace stage1 { /** - * The type of a JSON number + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; -} // namespace westmere -} // namespace simdjson + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for westmere */ -/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for westmere */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif -namespace simdjson { -namespace westmere { -namespace { -namespace jsoncharutils { + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; -} // namespace jsoncharutils + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for westmere */ -/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for westmere */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for lasx */ +/* including generic/stage1/json_string_scanner.h for lasx: #include */ +/* begin file generic/stage1/json_string_scanner.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - namespace simdjson { -namespace westmere { +namespace lasx { namespace { -/// @private -namespace atomparsing { +namespace stage1 { -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; } -} // namespace atomparsing +} // namespace stage1 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for westmere */ -/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for lasx */ +/* including generic/stage1/utf8_lookup4_algorithm.h for lasx: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container +namespace lasx { +namespace { +namespace utf8_validation { -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); +using namespace simd; -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, -}; + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, -} // namespace westmere -} // namespace simdjson + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, -namespace simdjson { -namespace westmere { + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = must_be_2_3_continuation(prev2, prev3); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; - _capacity = capacity; - return SUCCESS; -} + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } - _max_depth = max_depth; - return SUCCESS; -} + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } -} // namespace westmere + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for westmere */ -/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for lasx */ +/* including generic/stage1/json_scanner.h for lasx: #include */ +/* begin file generic/stage1/json_scanner.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { +namespace { +namespace stage1 { -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! /** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: + * A block of scanned json, with information on operators and scalars. * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. * - * Then any method returning simdjson_result will be chainable with your methods. + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. */ -template -struct implementation_simdjson_result_base { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + // Helpers - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; - + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** - * The error. - */ - simdjson_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS - + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } /** - * Get the result value. - * - * @throw simdjson_error if there was an error. + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. */ - simdjson_inline T& value() & noexcept(false); + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} -} // namespace westmere +} // namespace stage1 +} // unnamed namespace +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ -/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for westmere */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for lasx */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for lasx: #include */ +/* begin file generic/stage1/find_next_document_index.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include -#include - namespace simdjson { -namespace westmere { -namespace numberparsing { - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - +namespace lasx { namespace { +namespace stage1 { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} - -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif - { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; } - if (negative) { - d = -d; + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; } - return true; + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} - // The fast path has now failed, so we are failing back on the slower path. +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; - } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for lasx */ +/* including generic/stage1/json_minifier.h for lasx: #include */ +/* begin file generic/stage1/json_minifier.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} - mantissa += mantissa & 1; - mantissa >>= 1; +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; } - d = to_double(mantissa, real_exponent, negative); - return true; + return minifier.finish(dst, dst_len); } -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for lasx */ +/* including generic/stage1/json_structural_indexer.h for lasx: #include */ +/* begin file generic/stage1/json_structural_indexer.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} +#else + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; + simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } +#endif // SIMDJSON_PREFER_REVERSE_BITS -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; + template + simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { + write_index(idx, bits, START); + SIMDJSON_IF_CONSTEXPR (N > 1) { + write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); + } + return START+N; } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); + + template + simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { + write_indexes(idx, bits); + SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { + if (simdjson_unlikely((START+STEP) < cnt)) { + write_indexes_stepped<(START+STEP(idx, bits, cnt); + } + } + return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; } - return SUCCESS; -} -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; + + int cnt = static_cast(count_ones(bits)); - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. +#if SIMDJSON_PREFER_REVERSE_BITS + bits = reverse_bits(bits); +#endif +#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP + static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; +#else + static constexpr const int STEP = 4; +#endif + static constexpr const int STEP_UNTIL = 24; - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. + write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); + SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { + if (simdjson_unlikely(STEP_UNTIL < cnt)) { + for (int i=STEP_UNTIL; itail += cnt; } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} +}; -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; -} // unnamed namespace +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; - } - return INVALID_NUMBER(src); -} + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; } } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; } -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 does not use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); } -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + if (unescaped_chars_error) { + return UNESCAPED_CHARS; } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On-Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; + checker.check_eof(); + return checker.errors(); } -// Inlineable functions -namespace { +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for lasx */ +/* including generic/stage1/utf8_validator.h for lasx: #include */ +/* begin file generic/stage1/utf8_validator.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +} // namespace stage1 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for lasx */ +/* end file generic/stage1/amalgamated.h for lasx */ +/* including generic/stage2/amalgamated.h for lasx: #include */ +/* begin file generic/stage2/amalgamated.h for lasx */ +// Stuff other things depend on +/* including generic/stage2/base.h for lasx: #include */ +/* begin file generic/stage2/base.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H - return i; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for lasx */ +/* including generic/stage2/tape_writer.h for lasx: #include */ +/* begin file generic/stage2/tape_writer.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H - return i; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#include - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; - return i; -} + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for lasx */ +/* including generic/stage2/logger.h for lasx: #include */ +/* begin file generic/stage2/logger.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +#include - exponent += exp_neg ? 0-exp : exp; - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace lasx { +namespace { +namespace logger { - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} + static int log_depth; // Not threadsafe. Log only. -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +} // namespace logger +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +#endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H +/* end file generic/stage2/logger.h for lasx */ - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +// All other declarations +/* including generic/stage2/json_iterator.h for lasx: #include */ +/* begin file generic/stage2/json_iterator.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +namespace simdjson { +namespace lasx { +namespace { +namespace stage2 { - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; - exponent += exp_neg ? 0-exp : exp; - } + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); // - // Parse the decimal part. + // Start the document // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); // - // Parse the exponent + // Read first value // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + { + auto value = advance(); - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } - exponent += exp_neg ? 0-exp : exp; + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } } + goto document_end; - if (*p != '"') { return NUMBER_ERROR; } +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; } - return d; -} -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; -} // namespace numberparsing +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; } - return out; -} + } -} // namespace westmere -} // namespace simdjson +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for westmere */ +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } -namespace simdjson { -namespace westmere { + return SUCCESS; -// -// internal::implementation_simdjson_result_base inline implementation -// +} // walk_document() -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { } -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; } - -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; } - -#if SIMDJSON_EXCEPTIONS - -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); } -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } - -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); } -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + // Use the fact that most scalars are going to be either strings or numbers. + if(*value == '"') { + return visitor.visit_string(*this, value); + } else if (((*value - '0') < 10) || (*value == '-')) { + return visitor.visit_number(*this, value); + } + // true, false, null are uncommon. + switch (*value) { + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} - -} // namespace westmere +} // namespace stage2 +} // unnamed namespace +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ -/* end file simdjson/generic/amalgamated.h for westmere */ -/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ -/* begin file simdjson/westmere/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_UNTARGET_REGION -#endif - -/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/westmere/end.h */ - -#endif // SIMDJSON_WESTMERE_H -/* end file simdjson/westmere.h */ -/* including simdjson/westmere/implementation.h: #include */ -/* begin file simdjson/westmere/implementation.h */ -#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H -#define SIMDJSON_WESTMERE_IMPLEMENTATION_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for lasx */ +/* including generic/stage2/stringparsing.h for lasx: #include */ +/* begin file generic/stage2/stringparsing.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -namespace westmere { - -/** - * @private - */ -class implementation final : public simdjson::implementation { -public: - simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace westmere -} // namespace simdjson +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times -#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H -/* end file simdjson/westmere/implementation.h */ +namespace simdjson { +namespace lasx { +namespace { +/// @private +namespace stringparsing { -/* including simdjson/westmere/begin.h: #include */ -/* begin file simdjson/westmere/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ -#define SIMDJSON_IMPLEMENTATION westmere -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -class implementation; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; -namespace { -namespace simd { +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; -template struct simd8; -template struct simd8x64; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); -} // namespace simd -} // unnamed namespace + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } -} // namespace westmere -} // namespace simdjson + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} -#if SIMDJSON_CLANG_VISUAL_STUDIO /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif - -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } +} -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") -#endif +} // namespace stringparsing +} // unnamed namespace +} // namespace lasx +} // namespace simdjson -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for lasx */ +/* including generic/stage2/structural_iterator.h for lasx: #include */ +/* begin file generic/stage2/structural_iterator.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { +namespace stage2 { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; +} // namespace stage2 } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ -/* begin file simdjson/westmere/bitmask.h */ -#ifndef SIMDJSON_WESTMERE_BITMASK_H -#define SIMDJSON_WESTMERE_BITMASK_H +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for lasx */ +/* including generic/stage2/tape_builder.h for lasx: #include */ +/* begin file generic/stage2/tape_builder.h for lasx */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace westmere { +namespace lasx { namespace { +namespace stage2 { -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; -#endif // SIMDJSON_WESTMERE_BITMASK_H -/* end file simdjson/westmere/bitmask.h */ -/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ -/* begin file simdjson/westmere/numberparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; -class implementation; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; -namespace { -namespace simd { + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; -template struct simd8; -template struct simd8x64; + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; -} // namespace simd -} // unnamed namespace + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; -} // namespace westmere -} // namespace simdjson + simdjson_inline tape_builder(dom::document &doc) noexcept; -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} -namespace simdjson { -namespace westmere { -namespace numberparsing { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); } -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; } -} // namespace numberparsing -} // namespace westmere -} // namespace simdjson +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} -#define SIMDJSON_SWAR_NUMBER_PARSING 1 +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} -#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -/* end file simdjson/westmere/numberparsing_defs.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} -namespace simdjson { -namespace westmere { -namespace { -namespace simd { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} - template - struct base { - __m128i value; +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} - // Zero constructor - simdjson_inline base() : value{__m128i()} {} +// private: - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} - // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} - static const int SIZE = sizeof(base>::value); +} // namespace stage2 +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for lasx */ +/* end file generic/stage2/amalgamated.h for lasx */ - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } +// +// Stage 1 +// +namespace simdjson { +namespace lasx { - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +namespace { - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +using namespace simd; - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Inspired by haswell. + // LASX use low 5 bits as index. For the 6 operators (:,[]{}), the unique-5bits is [6:2]. + // The ASCII white-space and operators have these values: (char, hex, unique-5bits) + // (' ', 20, 00000) ('\t', 09, 01001) ('\n', 0A, 01010) ('\r', 0D, 01101) + // (',', 2C, 01011) (':', 3A, 01110) ('[', 5B, 10110) ('{', 7B, 11110) (']', 5D, 10111) ('}', 7D, 11111) + const simd8 ws_table = simd8::repeat_16( + ' ', 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', 0, 0, '\r', 0, 0 + ); + const simd8 op_table_lo = simd8::repeat_16( + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, ':', 0 + ); + const simd8 op_table_hi = simd8::repeat_16( + 0, 0, 0, 0, 0, 0, '[', ']', 0, 0, 0, 0, 0, 0, '{', '}' + ); + uint64_t ws = in.eq({ + in.chunks[0].lookup_16(ws_table), + in.chunks[1].lookup_16(ws_table), + }); + uint64_t op = in.eq({ + __lasx_xvshuf_b(op_table_hi, op_table_lo, in.chunks[0].shr<2>()), + __lasx_xvshuf_b(op_table_hi, op_table_lo, in.chunks[1].shr<2>()), + }); - // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + return { ws, op }; +} - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 + return is_third_byte | is_fourth_byte; +} - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } +} // unnamed namespace +} // namespace lasx +} // namespace simdjson - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } +// +// Stage 2 +// - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +// +// Implementation-specific overrides +// +namespace simdjson { +namespace lasx { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return lasx::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return lasx::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return lasx::stage1::generic_validate_utf8(buf,len); +} - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { + return lasx::stringparsing::parse_string(src, dst, allow_replacement); +} - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return lasx::stringparsing::parse_wobbly_string(src, dst); +} - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +} // namespace lasx +} // namespace simdjson - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} +/* including simdjson/lasx/end.h: #include */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } +#endif // SIMDJSON_SRC_LASX_CPP +/* end file lasx.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* including fallback.cpp: #include */ +/* begin file fallback.cpp */ +#ifndef SIMDJSON_SRC_FALLBACK_CPP +#define SIMDJSON_SRC_FALLBACK_CPP - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } +/* including simdjson/fallback.h: #include */ +/* begin file simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { -} // namespace simd -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +class implementation; -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ -/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ -/* begin file simdjson/westmere/stringparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +} // namespace fallback +} // namespace simdjson -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace fallback { namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; } - -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; } +#endif /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO +#ifdef _MSC_VER unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) // to least significant bit (LSB) for a set bit (1). @@ -40075,2082 +52354,2640 @@ simdjson_inline int leading_zeroes(uint64_t input_num) { return 64; #else return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} - -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif +#endif// _MSC_VER } } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace fallback { namespace { -namespace simd { - template - struct base { - __m128i value; +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - // Zero constructor - simdjson_inline base() : value{__m128i()} {} + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} + uint8_t c; +}; // struct backslash_and_quote - // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; +} // unnamed namespace +} // namespace fallback +} // namespace simdjson - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } +#include - static const int SIZE = sizeof(base>::value); +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; +namespace simdjson { +namespace fallback { +namespace numberparsing { - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} +struct open_container; +class dom_parser_implementation; - // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } +} // namespace fallback +} // namespace simdjson - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for fallback */ +/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for fallback */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for fallback */ +/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +#include - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} -} // namespace simd +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for fallback */ +/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace { +namespace fallback { -using namespace simd; +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container -// Holds backslashes and quotes locations. -struct backslash_and_quote { +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return bs_bits != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + 16); - v0.store(dst); - v1.store(dst + 16); - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; -} +}; -} // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -/* end file simdjson/westmere/stringparsing_defs.h */ -/* end file simdjson/westmere/begin.h */ -/* including generic/amalgamated.h for westmere: #include */ -/* begin file generic/amalgamated.h for westmere */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) -#error generic/dependencies.h must be included before generic/amalgamated.h! -#endif +namespace simdjson { +namespace fallback { -/* including generic/base.h for westmere: #include */ -/* begin file generic/base.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_BASE_H +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} -namespace simdjson { -namespace westmere { -namespace { +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } -struct json_character_block; + _max_depth = max_depth; + return SUCCESS; +} -} // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_BASE_H -/* end file generic/base.h for westmere */ -/* including generic/dom_parser_implementation.h for westmere: #include */ -/* begin file generic/dom_parser_implementation.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Interface a dom parser implementation must fulfill namespace simdjson { -namespace westmere { -namespace { +namespace fallback { -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); -simdjson_inline bool is_ascii(const simd8x64& input); +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { -} // unnamed namespace -} // namespace westmere -} // namespace simdjson + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; -#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file generic/dom_parser_implementation.h for westmere */ -/* including generic/json_character_block.h for westmere: #include */ -/* begin file generic/json_character_block.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; -namespace simdjson { -namespace westmere { -namespace { + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; -struct json_character_block { - static simdjson_inline json_character_block classify(const simd::simd8x64& in); + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; - simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_inline uint64_t op() const noexcept { return _op; } - simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; - uint64_t _whitespace; - uint64_t _op; -}; + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +#if SIMDJSON_EXCEPTIONS -#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H -/* end file generic/json_character_block.h for westmere */ -/* end file generic/amalgamated.h for westmere */ -/* including generic/stage1/amalgamated.h for westmere: #include */ -/* begin file generic/stage1/amalgamated.h for westmere */ -// Stuff other things depend on -/* including generic/stage1/base.h for westmere: #include */ -/* begin file generic/stage1/base.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); -class bit_indexer; -template -struct buf_block_reader; -struct json_block; -class json_minifier; -class json_scanner; -struct json_string_block; -class json_string_scanner; -class json_structural_indexer; + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); -} // namespace stage1 -namespace utf8_validation { -struct utf8_checker; -} // namespace utf8_validation +#endif // SIMDJSON_EXCEPTIONS -using utf8_validation::utf8_checker; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base -} // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H -/* end file generic/stage1/base.h for westmere */ -/* including generic/stage1/buf_block_reader.h for westmere: #include */ -/* begin file generic/stage1/buf_block_reader.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include #include namespace simdjson { -namespace westmere { +namespace fallback { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + namespace { -namespace stage1 { -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_inline size_t block_index(); - simdjson_inline bool has_full_block() const; - simdjson_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_inline size_t get_remainder(uint8_t *dst) const; - simdjson_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} -simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] <= ' ') { buf[i] = '_'; } - if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; } - buf[64] = '\0'; - return buf; + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; } -template -simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_inline size_t buf_block_reader::block_index() { return idx; } +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. -template -simdjson_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -template -simdjson_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -template -simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); } -template -simdjson_inline void buf_block_reader::advance() { - idx += STEP_SIZE; +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; } -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} -#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H -/* end file generic/stage1/buf_block_reader.h for westmere */ -/* including generic/stage1/json_escape_scanner.h for westmere: #include */ -/* begin file generic/stage1/json_escape_scanner.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well -/** - * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). - */ -struct json_escape_scanner { - /** The actual escape characters (the backslashes themselves). */ - uint64_t next_is_escaped = 0ULL; + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. - struct escaped_and_escape { - /** - * Mask of escaped characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 0100100010100101000 - * n \ \ n \ \ - * ``` - */ - uint64_t escaped; - /** - * Mask of escape characters. - * - * ``` - * \n \\n \\\n \\\\n \ - * 1001000101001010001 - * \ \ \ \ \ \ \ - * ``` - */ - uint64_t escape; - }; + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. - /** - * Get a mask of both escape and escaped characters (the characters following a backslash). - * - * @param potential_escape A mask of the character that can escape others (but could be - * escaped itself). e.g. block.eq('\\') - */ - simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. -#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT - if (!backslash) { return {next_escaped_without_backslashes(), 0}; } -#endif + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} - // | | Mask (shows characters instead of 1's) | Depth | Instructions | - // |--------------------------------|----------------------------------------|-------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | - // | | ` even odd even odd odd` | | | - // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) - // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) - // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) - // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () - // (*) this is not needed until the next iteration - uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); - uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); - uint64_t escape = escape_and_terminal_code & backslash; - this->next_is_escaped = escape >> 63; - return {escaped, escape}; +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} -private: - static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} - simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { - uint64_t escaped = this->next_is_escaped; - this->next_is_escaped = 0; - return escaped; +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; } + return INVALID_NUMBER(src); +} - /** - * Returns a mask of the next escape characters (masking out escaped backslashes), along with - * any non-backslash escape codes. - * - * \n \\n \\\n \\\\n returns: - * \n \ \ \n \ \ - * 11 100 1011 10100 - * - * You are expected to mask out the first bit yourself if the previous block had a trailing - * escape. - * - * & the result with potential_escape to get just the escape characters. - * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. - */ - static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { - // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: - // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be - // inverted (\\\ would be 010 instead of 101). - // - // ``` - // string: | ____\\\\_\\\\_____ | - // maybe_escaped | ODD | \ \ \ \ | - // even-aligned ^^^ ^^^^ odd-aligned - // ``` - // - // Taking that into account, our basic strategy is: +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 // - // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for - // odd-aligned runs. - // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the - // odd bits in odd-aligned runs. - // 3. & with backslash to clean up any stray bits. - // runs are set to 0, and then XORing with "odd": + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { // - // | | Mask (shows characters instead of 1's) | Instructions | - // |--------------------------------|----------------------------------------|---------------------| - // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | - // | | ` even odd even odd odd` | - // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) - // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) - // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) - // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // - - // Escaped characters are characters following an escape. - uint64_t maybe_escaped = potential_escape << 1; - - // To distinguish odd from even escape sequences, therefore, we turn on any *starting* - // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) - // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. - // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. - // - All other odd bytes are 1, and even bytes are 0. - uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; - uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; - - // Now we flip all odd bytes back with xor. This: - // - Makes odd runs of backslashes go from 0000 to 1010 - // - Makes even runs of backslashes go from 1111 to 1010 - // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) - // - Resets all other bytes to 0 - return even_series_codes_and_odd_bits ^ ODD_BITS; + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } } -}; - -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_escape_scanner.h for westmere */ -/* including generic/stage1/json_string_scanner.h for westmere: #include */ -/* begin file generic/stage1/json_string_scanner.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : - _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-escaped ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Scans for escape characters - json_escape_scanner escape_scanner{}; - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; -}; - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = escape_scanner.next(backslash).escaped; - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block(escaped, quote, in_string); -} - -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } } + WRITE_DOUBLE(d, src, writer); return SUCCESS; } -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H -/* end file generic/stage1/json_string_scanner.h for westmere */ -/* including generic/stage1/utf8_lookup4_algorithm.h for westmere: #include */ -/* begin file generic/stage1/utf8_lookup4_algorithm.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace westmere { -namespace { -namespace utf8_validation { - -using namespace simd; +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); - simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = must_be_2_3_continuation(prev2, prev3); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // Handle floats if there is a . or e (or both) // - simdjson_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; } - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. // - simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } - simdjson_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} - }; // struct utf8_checker -} // namespace utf8_validation +// Inlineable functions +namespace { -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H -/* end file generic/stage1/utf8_lookup4_algorithm.h for westmere */ -/* including generic/stage1/json_scanner.h for westmere: #include */ -/* begin file generic/stage1/json_scanner.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + return i; +} - // Helpers - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } } -}; -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() = default; - simdjson_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; + return i; } -simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. + // Check for minus sign // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -simdjson_inline error_code json_scanner::finish() { - return string_scanner.finish(); + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H -/* end file generic/stage1/json_scanner.h for westmere */ + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -// All other declarations -/* including generic/stage1/find_next_document_index.h for westmere: #include */ -/* begin file generic/stage1/find_next_document_index.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H -/* end file generic/stage1/find_next_document_index.h for westmere */ -/* including generic/stage1/json_minifier.h for westmere: #include */ -/* begin file generic/stage1/json_minifier.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -private: - simdjson_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; + exponent += exp_neg ? 0-exp : exp; + } -simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -template<> -simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; } -template<> -simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); } -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; } - return minifier.finish(dst, dst_len); + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; } -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H -/* end file generic/stage1/json_minifier.h for westmere */ -/* including generic/stage1/json_structural_indexer.h for westmere: #include */ -/* begin file generic/stage1/json_structural_indexer.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } -class bit_indexer { -public: - uint32_t *tail; + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; - simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -#if SIMDJSON_PREFER_REVERSE_BITS - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - simdjson_inline void write_index(uint32_t idx, uint64_t& rev_bits, int i) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); + exponent += exp_neg ? 0-exp : exp; } -#else - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - simdjson_inline void write_index(uint32_t idx, uint64_t& bits, int i) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } -#endif // SIMDJSON_PREFER_REVERSE_BITS + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - template - simdjson_inline int write_indexes(uint32_t idx, uint64_t& bits) { - write_index(idx, bits, START); - SIMDJSON_IF_CONSTEXPR (N > 1) { - write_indexes<(N-1>0?START+1:START), (N-1>=0?N-1:1)>(idx, bits); - } - return START+N; - } + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - template - simdjson_inline int write_indexes_stepped(uint32_t idx, uint64_t& bits, int cnt) { - write_indexes(idx, bits); - SIMDJSON_IF_CONSTEXPR ((START+STEP) < END) { - if (simdjson_unlikely((START+STEP) < cnt)) { - write_indexes_stepped<(START+STEP(idx, bits, cnt); - } - } - return ((END-START) % STEP) == 0 ? END : (END-START) - ((END-START) % STEP) + STEP; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // - // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it - // will provide its own version of the code. -#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER - simdjson_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; - int cnt = static_cast(count_ones(bits)); + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -#if SIMDJSON_PREFER_REVERSE_BITS - bits = reverse_bits(bits); -#endif -#ifdef SIMDJSON_STRUCTURAL_INDEXER_STEP - static constexpr const int STEP = SIMDJSON_STRUCTURAL_INDEXER_STEP; -#else - static constexpr const int STEP = 4; -#endif - static constexpr const int STEP_UNTIL = 24; + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); - write_indexes_stepped<0, STEP_UNTIL, STEP>(idx, bits, cnt); - SIMDJSON_IF_CONSTEXPR (STEP_UNTIL < 64) { - if (simdjson_unlikely(STEP_UNTIL < cnt)) { - for (int i=STEP_UNTIL; i 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; } + } else { + overflow = p-src > 19; + } - this->tail += cnt; + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; } -#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER -}; + if (*p != '"') { return NUMBER_ERROR; } -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -private: - simdjson_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING -simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} +} // namespace numberparsing -// Skip the last character if it is partial -simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); } - } - if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left - return len; + return out; } +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for fallback */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + // -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. +// internal::implementation_simdjson_result_base inline implementation // -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); } -template<> -simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; } -template<> -simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; } -simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); -#if SIMDJSON_UTF8VALIDATION - checker.check_next_input(in); -#endif - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; } -simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; } -} // namespace stage1 -} // unnamed namespace -} // namespace westmere +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace fallback } // namespace simdjson -// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. -#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* end file simdjson/generic/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H -/* end file generic/stage1/json_structural_indexer.h for westmere */ -/* including generic/stage1/utf8_validator.h for westmere: #include */ -/* begin file generic/stage1/utf8_validator.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_H +/* end file simdjson/fallback.h */ +/* including simdjson/fallback/implementation.h: #include */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { +namespace fallback { /** - * Validates that the string is actual UTF-8. + * @private */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ + +/* including simdjson/fallback/begin.h: #include */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; } +#endif -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER } -} // namespace stage1 } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H -/* end file generic/stage1/utf8_validator.h for westmere */ -/* end file generic/stage1/amalgamated.h for westmere */ -/* including generic/stage2/amalgamated.h for westmere: #include */ -/* begin file generic/stage2/amalgamated.h for westmere */ -// Stuff other things depend on -/* including generic/stage2/base.h for westmere: #include */ -/* begin file generic/stage2/base.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace fallback { namespace { -namespace stage2 { -class json_iterator; -class structural_iterator; -struct tape_builder; -struct tape_writer; +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} -} // namespace stage2 } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H -/* end file generic/stage2/base.h for westmere */ -/* including generic/stage2/tape_writer.h for westmere: #include */ -/* begin file generic/stage2/tape_writer.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + namespace simdjson { -namespace westmere { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including generic/stage1/find_next_document_index.h for fallback: #include */ +/* begin file generic/stage1/find_next_document_index.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { namespace { -namespace stage2 { +namespace stage1 { -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} - /** Write a signed 64-bit value to tape. */ - simdjson_inline void append_s64(int64_t value) noexcept; +} // namespace stage1 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson - /** Write an unsigned 64-bit value to tape. */ - simdjson_inline void append_u64(uint64_t value) noexcept; +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for fallback */ +/* including generic/stage2/stringparsing.h for fallback: #include */ +/* begin file generic/stage2/stringparsing.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H - /** Write a double value to tape. */ - simdjson_inline void append_double(double value) noexcept; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_inline void skip() noexcept; +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace stringparsing { - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_inline void skip_large_integer() noexcept; +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_inline void skip_double() noexcept; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct tape_writer + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; -simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); -simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } -/** Write a double value to tape. */ -simdjson_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; } -simdjson_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} -simdjson_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion is not valid; we defer the check for this to inside the + // multilingual plane check. + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } -simdjson_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; } -simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} -template -simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_ptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } } -simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } } -} // namespace stage2 +} // namespace stringparsing } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H -/* end file generic/stage2/tape_writer.h for westmere */ -/* including generic/stage2/logger.h for westmere: #include */ -/* begin file generic/stage2/logger.h for westmere */ +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for fallback */ +/* including generic/stage2/logger.h for fallback: #include */ +/* begin file generic/stage2/logger.h for fallback */ #ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -42164,7 +55001,7 @@ simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, intern // This is for an internal-only stage 2 specific logger. // Set LOG_ENABLED = true to log what stage 2 is doing! namespace simdjson { -namespace westmere { +namespace fallback { namespace { namespace logger { @@ -42247,15 +55084,13 @@ namespace logger { } // namespace logger } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson #endif // SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H -/* end file generic/stage2/logger.h for westmere */ - -// All other declarations -/* including generic/stage2/json_iterator.h for westmere: #include */ -/* begin file generic/stage2/json_iterator.h for westmere */ +/* end file generic/stage2/logger.h for fallback */ +/* including generic/stage2/json_iterator.h for fallback: #include */ +/* begin file generic/stage2/json_iterator.h for fallback */ #ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -42266,7 +55101,7 @@ namespace logger { /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace fallback { namespace { namespace stage2 { @@ -42580,327 +55415,133 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V } // namespace stage2 } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson #endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H -/* end file generic/stage2/json_iterator.h for westmere */ -/* including generic/stage2/stringparsing.h for westmere: #include */ -/* begin file generic/stage2/stringparsing.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/json_iterator.h for fallback */ +/* including generic/stage2/tape_writer.h for fallback: #include */ +/* begin file generic/stage2/tape_writer.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ /* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times +#include namespace simdjson { -namespace westmere { +namespace fallback { namespace { -/// @private -namespace stringparsing { +namespace stage2 { -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr, bool allow_replacement) { - // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) - constexpr uint32_t substitution_code_point = 0xfffd; - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; - // We have already checked that the high surrogate is valid and - // (code_point - 0xd800) < 1024. - // - // Check that code_point_2 is in the range 0xdc00..0xdfff - // and that code_point_2 was parsed from valid hex. - uint32_t low_bit = code_point_2 - 0xdc00; - if (low_bit >> 10) { - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } else { - code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; - } - } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { - // If we encounter a low surrogate (not preceded by a high surrogate) - // then we have an error. - if(!allow_replacement) { return false; } - code_point = substitution_code_point; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; -// handle a unicode codepoint using the wobbly convention -// https://simonsapin.github.io/wtf-8/ -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // It is not ideal that this function is nearly identical to handle_unicode_codepoint. - // - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // If we found a high surrogate, we must - // check for low surrogate for characters - // outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - const uint8_t *src_data = *src_ptr; - /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ - if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); - uint32_t low_bit = code_point_2 - 0xdc00; - if ((low_bit >> 10) == 0) { - code_point = - (((code_point - 0xd800) << 10) | low_bit) + 0x10000; - *src_ptr += 6; - } - } - } +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); } - -/** - * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There - * must be an unescaped quote terminating the string. It returns the final output - * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large - * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + - * SIMDJSON_PADDING bytes. - */ -simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; } -simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { - // It is not ideal that this function is nearly identical to parse_string. - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint_wobbly(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); } -} // namespace stringparsing -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H -/* end file generic/stage2/stringparsing.h for westmere */ -/* including generic/stage2/structural_iterator.h for westmere: #include */ -/* begin file generic/stage2/structural_iterator.h for westmere */ -#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #include */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} -namespace simdjson { -namespace westmere { -namespace { -namespace stage2 { +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} -class structural_iterator { -public: - const uint8_t* const buf; - uint32_t *next_structural; - dom_parser_implementation &dom_parser; +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} - // Start a structural - simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { - } - // Get the buffer position of the current structural character - simdjson_inline const uint8_t* current() { - return &buf[*(next_structural-1)]; - } - // Get the current structural character - simdjson_inline char current_char() { - return buf[*(next_structural-1)]; - } - // Get the next structural character without advancing - simdjson_inline char peek_next_char() { - return buf[*next_structural]; - } - simdjson_inline const uint8_t* peek() { - return &buf[*next_structural]; - } - simdjson_inline const uint8_t* advance() { - return &buf[*(next_structural++)]; - } - simdjson_inline char advance_char() { - return buf[*(next_structural++)]; - } - simdjson_inline size_t remaining_len() { - return dom_parser.len - *(next_structural-1); - } +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} - simdjson_inline bool at_end() { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; - } - simdjson_inline bool at_beginning() { - return next_structural == dom_parser.structural_indexes.get(); - } -}; +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} } // namespace stage2 } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H -/* end file generic/stage2/structural_iterator.h for westmere */ -/* including generic/stage2/tape_builder.h for westmere: #include */ -/* begin file generic/stage2/tape_builder.h for westmere */ +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for fallback */ +/* including generic/stage2/tape_builder.h for fallback: #include */ +/* begin file generic/stage2/tape_builder.h for fallback */ #ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -42917,7 +55558,7 @@ class structural_iterator { namespace simdjson { -namespace westmere { +namespace fallback { namespace { namespace stage2 { @@ -43194,26 +55835,25 @@ simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { } // namespace stage2 } // unnamed namespace -} // namespace westmere +} // namespace fallback } // namespace simdjson #endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H -/* end file generic/stage2/tape_builder.h for westmere */ -/* end file generic/stage2/amalgamated.h for westmere */ +/* end file generic/stage2/tape_builder.h for fallback */ // // Stage 1 // namespace simdjson { -namespace westmere { +namespace fallback { simdjson_warn_unused error_code implementation::create_dom_parser_implementation( size_t capacity, size_t max_depth, std::unique_ptr& dst ) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); + dst.reset( new (std::nothrow) fallback::dom_parser_implementation() ); if (!dst) { return MEMALLOC; } if (auto err = dst->set_capacity(capacity)) return err; @@ -43223,115 +55863,348 @@ simdjson_warn_unused error_code implementation::create_dom_parser_implementation } namespace { +namespace stage1 { -using namespace simd; +class structural_scanner { +public: -simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); +simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) + : buf{_parser.buf}, + next_structural_index{_parser.structural_indexes.get()}, + parser{_parser}, + len{static_cast(_parser.len)}, + partial{_partial} { +} - // The 6 operators (:,[]{}) have these values: - // - // , 2C - // : 3A - // [ 5B - // { 7B - // ] 5D - // } 7D - // - // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. - // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then - // match it (against | 0x20). - // - // To prevent recognizing other characters, everything else gets compared with 0, which cannot - // match due to the | 0x20. - // - // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , - // and :. This gets caught in stage 2, which checks the actual character to ensure the right - // operators are in the right places. - const auto op_table = simd8::repeat_16( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B - ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D - ); +simdjson_inline void add_structural() { + *next_structural_index = idx; + next_structural_index++; +} - // We compute whitespace and op separately. If the code later only use one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). +simdjson_inline bool is_continuation(uint8_t c) { + return (c & 0xc0) == 0x80; +} +simdjson_inline void validate_utf8_character() { + // Continuation + if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { + // extra continuation + error = UTF8_ERROR; + idx++; + return; + } - const uint64_t whitespace = in.eq({ - _mm_shuffle_epi8(whitespace_table, in.chunks[0]), - _mm_shuffle_epi8(whitespace_table, in.chunks[1]), - _mm_shuffle_epi8(whitespace_table, in.chunks[2]), - _mm_shuffle_epi8(whitespace_table, in.chunks[3]) - }); - // Turn [ and ] into { and } - const simd8x64 curlified{ - in.chunks[0] | 0x20, - in.chunks[1] | 0x20, - in.chunks[2] | 0x20, - in.chunks[3] | 0x20 - }; - const uint64_t op = curlified.eq({ - _mm_shuffle_epi8(op_table, in.chunks[0]), - _mm_shuffle_epi8(op_table, in.chunks[1]), - _mm_shuffle_epi8(op_table, in.chunks[2]), - _mm_shuffle_epi8(op_table, in.chunks[3]) - }); - return { whitespace, op }; -} + // 2-byte + if ((buf[idx] & 0x20) == 0) { + // missing continuation + if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { + if (idx+1 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 1100000_ 10______ + if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } + idx += 2; + return; + } -simdjson_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().is_ascii(); -} + // 3-byte + if ((buf[idx] & 0x10) == 0) { + // missing continuation + if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11100000 100_____ ________ + if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; } + // surrogates: U+D800-U+DFFF 11101101 101_____ + if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; } + idx += 3; + return; + } -simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); + // 4-byte + // missing continuation + if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11110000 1000____ ________ ________ + if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; } + // too large: > U+10FFFF: + // 11110100 (1001|101_)____ + // 1111(1___|011_|0101) 10______ + // also includes 5, 6, 7 and 8 byte characters: + // 11111___ + if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; } + if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } + idx += 4; } -simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80 - return is_third_byte | is_fourth_byte; +// Returns true if the string is unclosed. +simdjson_inline bool validate_string() { + idx++; // skip first quote + while (idx < len && buf[idx] != '"') { + if (buf[idx] == '\\') { + idx += 2; + } else if (simdjson_unlikely(buf[idx] & 0x80)) { + validate_utf8_character(); + } else { + if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } + idx++; + } + } + if (idx >= len) { return true; } + return false; } -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -// -// Stage 2 -// +simdjson_inline bool is_whitespace_or_operator(uint8_t c) { + switch (c) { + case '{': case '}': case '[': case ']': case ',': case ':': + case ' ': case '\r': case '\n': case '\t': + return true; + default: + return false; + } +} // -// Implementation-specific overrides +// Parse the entire input in STEP_SIZE-byte chunks. // +simdjson_inline error_code scan() { + bool unclosed_string = false; + for (;idx 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + parser.n_structural_indexes = new_structural_indexes; + } else if(partial == stage1_mode::streaming_final) { + if(unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (parser.n_structural_indexes == 0) { return EMPTY; } + } else if(unclosed_string) { error = UNCLOSED_STRING; } + return error; +} -namespace simdjson { -namespace westmere { +private: + const uint8_t *buf; + uint32_t *next_structural_index; + dom_parser_implementation &parser; + uint32_t len; + uint32_t idx{0}; + error_code error{SUCCESS}; + stage1_mode partial; +}; // structural_scanner -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); -} +} // namespace stage1 +} // unnamed namespace -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { this->buf = _buf; this->len = _len; - return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); + stage1::structural_scanner scanner(*this, partial); + return scanner.scan(); +} + +// big table for the minifier +static uint8_t jump_table[256 * 3] = { + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, +}; + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + size_t i = 0, pos = 0; + uint8_t quote = 0; + uint8_t nonescape = 1; + + while (i < len) { + unsigned char c = buf[i]; + uint8_t *meta = jump_table + 3 * c; + + quote = quote ^ (meta[0] & nonescape); + dst[pos] = c; + pos += meta[2] | quote; + + i += 1; + nonescape = uint8_t(~nonescape) | (meta[1]); + } + dst_len = pos; // we intentionally do not work with a reference + // for fear of aliasing + return quote ? UNCLOSED_STRING : SUCCESS; } +// credit: based on code from Google Fuchsia (Apache Licensed) simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return westmere::stage1::generic_validate_utf8(buf,len); + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 8 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v1; + memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + if (byte < 0x80) { + pos++; + continue; + } else if ((byte & 0xe0) == 0xc0) { + next_pos = pos + 2; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); + if (code_point < 0x80 || 0x7ff < code_point) { return false; } + } else if ((byte & 0xf0) == 0xe0) { + next_pos = pos + 3; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x0f) << 12 | + (data[pos + 1] & 0x3f) << 6 | + (data[pos + 2] & 0x3f); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0xf8) == 0xf0) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + if ((data[pos + 3] & 0xc0) != 0x80) { return false; } + // range check + code_point = + (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | + (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); + if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; } +} // namespace fallback +} // namespace simdjson + +// +// Stage 2 +// + +namespace simdjson { +namespace fallback { + simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { return stage2::tape_builder::parse_document(*this, _doc); } @@ -43340,12 +56213,13 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +SIMDJSON_NO_SANITIZE_MEMORY simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { - return westmere::stringparsing::parse_string(src, dst, replacement_char); + return fallback::stringparsing::parse_string(src, dst, replacement_char); } simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { - return westmere::stringparsing::parse_wobbly_string(src, dst); + return fallback::stringparsing::parse_wobbly_string(src, dst); } simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { @@ -43354,27 +56228,22 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * return stage2(_doc); } -} // namespace westmere +} // namespace fallback } // namespace simdjson -/* including simdjson/westmere/end.h: #include */ -/* begin file simdjson/westmere/end.h */ +/* including simdjson/fallback/end.h: #include */ +/* begin file simdjson/fallback/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_UNTARGET_REGION -#endif - -/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ #undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/westmere/end.h */ +/* end file simdjson/fallback/end.h */ -#endif // SIMDJSON_SRC_WESTMERE_CPP -/* end file westmere.cpp */ +#endif // SIMDJSON_SRC_FALLBACK_CPP +/* end file fallback.cpp */ #endif - /* undefining SIMDJSON_CONDITIONAL_INCLUDE */ #undef SIMDJSON_CONDITIONAL_INCLUDE diff --git a/simdjson/simdjson.h b/simdjson/simdjson.h index a56d5d7..a0d4499 100644 --- a/simdjson/simdjson.h +++ b/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-01-28 12:42:59 -0500. Do not edit! */ +/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H @@ -60,6 +60,16 @@ #endif #endif +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 @@ -87,6 +97,30 @@ #endif #endif +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +#if defined(__apple_build_version__) +#if __apple_build_version__ < 14000000 +#define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to +#endif +#endif + + +#if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +#if __cpp_concepts >= 201907L +#include +#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif +#else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) + #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ @@ -99,11 +133,18 @@ #include #include #include +#include #ifndef _WIN32 // strcasecmp, strncasecmp #include #endif +static_assert(CHAR_BIT == 8, "simdjson requires 8-bit bytes"); + + +// We are using size_t without namespace std:: throughout the project +using std::size_t; + #ifdef _MSC_VER #define SIMDJSON_VISUAL_STUDIO 1 /** @@ -125,13 +166,16 @@ #endif // __clang__ #endif // _MSC_VER -#if defined(__x86_64__) || defined(_M_AMD64) +#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) #define SIMDJSON_IS_X86_64 1 -#elif defined(__aarch64__) || defined(_M_ARM64) +#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 +#elif defined(__loongarch_lp64) +#define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) +#define SIMDJSON_IS_PPC64 1 #if defined(__ALTIVEC__) #define SIMDJSON_IS_PPC64_VMX 1 #endif // defined(__ALTIVEC__) @@ -239,6 +283,11 @@ #define SIMDJSON_NO_SANITIZE_UNDEFINED #endif +#if defined(__clang__) || defined(__GNUC__) +#define simdjson_pure [[gnu::pure]] +#else +#define simdjson_pure +#endif #if defined(__clang__) || defined(__GNUC__) #if defined(__has_feature) @@ -284,6 +333,45 @@ #endif + + +#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#define SIMDJSON_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined _WIN32 +#define SIMDJSON_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#elif defined(__MVS__) +#include +#else +#ifdef __has_include +#if __has_include() +#include +#endif //__has_include() +#endif //__has_include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define SIMDJSON_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define SIMDJSON_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define SIMDJSON_IS_BIG_ENDIAN 0 +#else +#define SIMDJSON_IS_BIG_ENDIAN 1 +#endif +#endif + + #endif // SIMDJSON_PORTABILITY_H /* end file simdjson/portability.h */ @@ -332,6 +420,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) #if SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __declspec(deprecated) #define simdjson_really_inline __forceinline #define simdjson_never_inline __declspec(noinline) @@ -370,6 +460,8 @@ double from_chars(const char *first, const char* end) noexcept; #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS #else // SIMDJSON_REGULAR_VISUAL_STUDIO + // We could use [[deprecated]] but it requires C++14 + #define simdjson_deprecated __attribute__((deprecated)) #define simdjson_really_inline inline __attribute__((always_inline)) #define simdjson_never_inline inline __attribute__((noinline)) @@ -545,13 +637,12 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -// #pragma once // We remove #pragma once here as it generates a warning in some cases. We rely on the include guard. #ifndef NONSTD_SV_LITE_H_INCLUDED #define NONSTD_SV_LITE_H_INCLUDED #define string_view_lite_MAJOR 1 -#define string_view_lite_MINOR 7 +#define string_view_lite_MINOR 8 #define string_view_lite_PATCH 0 #define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) @@ -673,6 +764,8 @@ SIMDJSON_PUSH_DISABLE_ALL_WARNINGS #if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +#include + namespace nonstd { template< class CharT, class Traits, class Allocator = std::allocator > @@ -703,22 +796,22 @@ inline namespace literals { inline namespace string_view_literals { -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } @@ -2049,22 +2142,22 @@ nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2073,22 +2166,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, #if nssv_CONFIG_USR_SV_OPERATOR -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2344,7 +2437,7 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.6.4" +#define SIMDJSON_VERSION "3.12.3" namespace simdjson { enum { @@ -2355,11 +2448,11 @@ enum { /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 6, + SIMDJSON_VERSION_MINOR = 12, /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 4 + SIMDJSON_VERSION_REVISION = 3 }; } // namespace simdjson @@ -2401,13 +2494,14 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong, this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error. DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' NUMBER_ERROR, ///< Problem while parsing a number + BIGINT_ERROR, ///< The integer value exceeds 64 bits UTF8_ERROR, ///< the input is not valid UTF-8 UNINITIALIZED, ///< unknown error, or uninitialized document EMPTY, ///< no structural element found @@ -2419,19 +2513,27 @@ enum error_code { INDEX_OUT_OF_BOUNDS, ///< JSON array index too large NO_SUCH_FIELD, ///< JSON field not found in object IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_JSON_POINTER, ///< Invalid JSON pointer syntax INVALID_URI_FRAGMENT, ///< Invalid URI fragment UNEXPECTED_ERROR, ///< indicative of a bug in simdjson PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; +/** + * Some errors are fatal and invalidate the document. This function returns true if the + * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT. + * Once a fatal error is encountered, the on-demand document is no longer valid and + * processing should stop. + */ + inline bool is_fatal(error_code error) noexcept; + /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code @@ -2466,7 +2568,7 @@ struct simdjson_error : public std::exception { */ simdjson_error(error_code error) noexcept : _error{error} { } /** The error message */ - const char *what() const noexcept { return error_message(error()); } + const char *what() const noexcept override { return error_message(error()); } /** The error code */ error_code error() const noexcept { return _error; } private: @@ -2698,6 +2800,140 @@ inline const std::string error_message(int error) noexcept; #endif // SIMDJSON_ERROR_H /* end file simdjson/error.h */ /* skipped duplicate #include "simdjson/portability.h" */ +/* including simdjson/concepts.h: #include "simdjson/concepts.h" */ +/* begin file simdjson/concepts.h */ +#ifndef SIMDJSON_CONCEPTS_H +#define SIMDJSON_CONCEPTS_H +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#include +#include + +namespace simdjson { +namespace concepts { + +namespace details { +#define SIMDJSON_IMPL_CONCEPT(name, method) \ + template \ + concept supports_##name = !std::is_const_v && requires { \ + typename std::remove_cvref_t::value_type; \ + requires requires(typename std::remove_cvref_t::value_type &&val, \ + T obj) { \ + obj.method(std::move(val)); \ + requires !requires { obj = std::move(val); }; \ + }; \ + }; + +SIMDJSON_IMPL_CONCEPT(emplace_back, emplace_back) +SIMDJSON_IMPL_CONCEPT(emplace, emplace) +SIMDJSON_IMPL_CONCEPT(push_back, push_back) +SIMDJSON_IMPL_CONCEPT(add, add) +SIMDJSON_IMPL_CONCEPT(push, push) +SIMDJSON_IMPL_CONCEPT(append, append) +SIMDJSON_IMPL_CONCEPT(insert, insert) +SIMDJSON_IMPL_CONCEPT(op_append, operator+=) + +#undef SIMDJSON_IMPL_CONCEPT +} // namespace details + + +template +concept string_view_like = std::is_convertible_v && + !std::is_convertible_v; + +template +concept constructible_from_string_view = std::is_constructible_v + && !std::is_same_v + && std::is_default_constructible_v; + +template +concept string_view_keyed_map = string_view_like + && requires(std::remove_cvref_t& m, typename M::key_type sv, typename M::mapped_type v) { + { m.emplace(sv, v) } -> std::same_as>; +}; + +/// Check if T is a container that we can append to, including: +/// std::vector, std::deque, std::list, std::string, ... +template +concept appendable_containers = + (details::supports_emplace_back || details::supports_emplace || + details::supports_push_back || details::supports_push || + details::supports_add || details::supports_append || + details::supports_insert) && !string_view_keyed_map; + +/// Insert into the container however possible +template +constexpr decltype(auto) emplace_one(T &vec, Args &&...args) { + if constexpr (details::supports_emplace_back) { + return vec.emplace_back(std::forward(args)...); + } else if constexpr (details::supports_emplace) { + return vec.emplace(std::forward(args)...); + } else if constexpr (details::supports_push_back) { + return vec.push_back(std::forward(args)...); + } else if constexpr (details::supports_push) { + return vec.push(std::forward(args)...); + } else if constexpr (details::supports_add) { + return vec.add(std::forward(args)...); + } else if constexpr (details::supports_append) { + return vec.append(std::forward(args)...); + } else if constexpr (details::supports_insert) { + return vec.insert(std::forward(args)...); + } else if constexpr (details::supports_op_append && sizeof...(Args) == 1) { + return vec.operator+=(std::forward(args)...); + } else { + static_assert(!sizeof(T *), + "We don't know how to add things to this container"); + } +} + +/// This checks if the container will return a reference to the newly added +/// element after an insert which for example `std::vector::emplace_back` does +/// since C++17; this will allow some optimizations. +template +concept returns_reference = appendable_containers && requires { + typename std::remove_cvref_t::reference; + requires requires(typename std::remove_cvref_t::value_type &&val, T obj) { + { + emplace_one(obj, std::move(val)) + } -> std::same_as::reference>; + }; +}; + +template +concept smart_pointer = requires(std::remove_cvref_t ptr) { + // Check if T has a member type named element_type + typename std::remove_cvref_t::element_type; + + // Check if T has a get() member function + { + ptr.get() + } -> std::same_as::element_type *>; + + // Check if T can be dereferenced + { *ptr } -> std::same_as::element_type &>; +}; + +template +concept optional_type = requires(std::remove_cvref_t obj) { + typename std::remove_cvref_t::value_type; + { obj.value() } -> std::same_as::value_type&>; + requires requires(typename std::remove_cvref_t::value_type &&val) { + obj.emplace(std::move(val)); + obj = std::move(val); + { + obj.value_or(val) + } -> std::convertible_to::value_type>; + }; + { static_cast(obj) } -> std::same_as; // convertible to bool +}; + + + +} // namespace concepts +} // namespace simdjson +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_CONCEPTS_H +/* end file simdjson/concepts.h */ /** * @brief The top level simdjson namespace, containing everything the library provides. @@ -2760,6 +2996,11 @@ enum class tape_type; #include namespace simdjson { + +inline bool is_fatal(error_code error) noexcept { + return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT; +} + namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { @@ -3086,7 +3327,7 @@ class dom_parser_implementation { * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -3103,7 +3344,7 @@ class dom_parser_implementation { * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -3157,14 +3398,14 @@ class dom_parser_implementation { * * @return Current capacity, in bytes. */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -3205,11 +3446,11 @@ simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } -simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } @@ -3281,7 +3522,7 @@ class implementation { * * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". */ - virtual const std::string &name() const { return _name; } + virtual std::string name() const { return std::string(_name); } /** * The description of this implementation. @@ -3291,7 +3532,7 @@ class implementation { * * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". */ - virtual const std::string &description() const { return _description; } + virtual std::string description() const { return std::string(_description); } /** * The instruction sets this implementation is compiled against @@ -3367,18 +3608,19 @@ class implementation { _required_instruction_sets(required_instruction_sets) { } - virtual ~implementation()=default; +protected: + ~implementation() = default; private: /** * The name of this implementation. */ - const std::string _name; + std::string_view _name; /** * The description of this implementation. */ - const std::string _description; + std::string_view _description; /** * Instruction sets required for this implementation. @@ -3505,6 +3747,9 @@ struct padded_string final { * @param length the number of bytes to copy */ explicit inline padded_string(const char *data, size_t length) noexcept; +#ifdef __cpp_char8_t + explicit inline padded_string(const char8_t *data, size_t length) noexcept; +#endif /** * Create a new padded string by copying the given input. * @@ -3574,6 +3819,15 @@ struct padded_string final { /** * Load this padded string from a file. * + * ## Windows and Unicode + * + * Windows users who need to read files with non-ANSI characters in the + * name should set their code page to UTF-8 (65001) before calling this + * function. This should be the default with Windows 11 and better. + * Further, they may use the AreFileApisANSI function to determine whether + * the filename is interpreted using the ANSI or the system default OEM + * codepage, and they may call SetFileApisToOEM accordingly. + * * @return IO_ERROR on error. Be mindful that on some 32-bit systems, * the file size might be limited to 2 GB. * @@ -3615,7 +3869,10 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false); #endif +/** + * Create a padded_string_view from a string. The string will be padded with SIMDJSON_PADDING + * space characters. The resulting padded_string_view will have a length equal to the original + * string. + * + * @param s The string. + * @return The padded string. + */ +inline padded_string_view pad(std::string& s) noexcept; } // namespace simdjson #endif // SIMDJSON_PADDED_STRING_VIEW_H @@ -3770,13 +4040,19 @@ namespace simdjson { inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept : std::string_view(s, len), _capacity(capacity) { + if(_capacity < len) { _capacity = len; } } inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept : padded_string_view(reinterpret_cast(s), len, capacity) { } - +#ifdef __cpp_char8_t +inline padded_string_view::padded_string_view(const char8_t* s, size_t len, size_t capacity) noexcept + : padded_string_view(reinterpret_cast(s), len, capacity) +{ +} +#endif inline padded_string_view::padded_string_view(const std::string &s) noexcept : std::string_view(s), _capacity(s.capacity()) { @@ -3785,6 +4061,7 @@ inline padded_string_view::padded_string_view(const std::string &s) noexcept inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept : std::string_view(s), _capacity(capacity) { + if(_capacity < s.length()) { _capacity = s.length(); } } inline size_t padded_string_view::capacity() const noexcept { return _capacity; } @@ -3805,6 +4082,11 @@ inline bool padded_string_view::remove_utf8_bom() noexcept { inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } #endif +inline padded_string_view pad(std::string& s) noexcept { + const auto len = s.size(); + s.append(SIMDJSON_PADDING, ' '); + return padded_string_view(s.data(), len, s.size()); +} } // namespace simdjson @@ -3857,11 +4139,27 @@ inline padded_string::padded_string(const char *data, size_t length) noexcept if ((data != nullptr) && (data_ptr != nullptr)) { std::memcpy(data_ptr, data, length); } + if (data_ptr == nullptr) { + viable_size = 0; + } +} +#ifdef __cpp_char8_t +inline padded_string::padded_string(const char8_t *data, size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { + if ((data != nullptr) && (data_ptr != nullptr)) { + std::memcpy(data_ptr, reinterpret_cast(data), length); + } + if (data_ptr == nullptr) { + viable_size = 0; + } } +#endif // note: do not pass std::string arguments by value inline padded_string::padded_string(const std::string & str_ ) noexcept : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { - if (data_ptr != nullptr) { + if (data_ptr == nullptr) { + viable_size = 0; + } else { std::memcpy(data_ptr, str_.data(), str_.size()); } } @@ -3880,6 +4178,7 @@ inline padded_string::padded_string(std::string_view sv_) noexcept inline padded_string::padded_string(padded_string &&o) noexcept : viable_size(o.viable_size), data_ptr(o.data_ptr) { o.data_ptr = nullptr; // we take ownership + o.viable_size = 0; } inline padded_string &padded_string::operator=(padded_string &&o) noexcept { @@ -3974,10 +4273,14 @@ inline simdjson_result padded_string::load(std::string_view filen } // namespace simdjson -inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { +inline simdjson::padded_string operator ""_padded(const char *str, size_t len) { return simdjson::padded_string(str, len); } - +#ifdef __cpp_char8_t +inline simdjson::padded_string operator ""_padded(const char8_t *str, size_t len) { + return simdjson::padded_string(reinterpret_cast(str), len); +} +#endif #endif // SIMDJSON_PADDED_STRING_INL_H /* end file simdjson/padded_string-inl.h */ /* skipped duplicate #include "simdjson/padded_string_view.h" */ @@ -4119,11 +4422,14 @@ class array { public: using value_type = element; using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; /** * Get the actual value */ - inline value_type operator*() const noexcept; + inline reference operator*() const noexcept; /** * Get the next value. * @@ -4205,6 +4511,21 @@ class array { */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) const noexcept; + /** * Get the value at the given index. This function has linear-time complexity and * is equivalent to the following: @@ -4223,6 +4544,11 @@ class array { */ inline simdjson_result at(size_t index) const noexcept; + /** + * Implicitly convert object to element + */ + inline operator element() const noexcept; + private: simdjson_inline array(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; @@ -4244,6 +4570,7 @@ struct simdjson_result : public internal::simdjson_result_base at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at_path(std::string_view json_path) const noexcept; inline simdjson_result at(size_t index) const noexcept; #if SIMDJSON_EXCEPTIONS @@ -4360,7 +4687,7 @@ class document { * The memory allocation is strict: you * can you use this function to increase * or lower the amount of allocated memory. - * Passsing zero clears the memory. + * Passing zero clears the memory. */ error_code allocate(size_t len) noexcept; /** @private Capacity in bytes, in terms @@ -4460,6 +4787,15 @@ class parser { * If the parser's current capacity is less than the file length, it will allocate enough capacity * to handle it (up to max_capacity). * + * ## Windows and Unicode + * + * Windows users who need to read files with non-ANSI characters in the + * name should set their code page to UTF-8 (65001) before calling this + * function. This should be the default with Windows 11 and better. + * Further, they may use the AreFileApisANSI function to determine whether + * the filename is interpreted using the ANSI or the system default OEM + * codepage, and they may call SetFileApisToOEM accordingly. + * * @param path The path to load. * @return The document, or an error: * - IO_ERROR if there was an error opening or reading the file. @@ -4570,6 +4906,22 @@ class parser { * simdjson::dom::parser parser; * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's parse function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * element doc = parser.parse(simdjson::pad(json)); + * * ### Parser Capacity * * If the parser's current capacity is less than len, it will allocate enough capacity @@ -4692,7 +5044,7 @@ class parser { * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the + * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### Error Handling @@ -4786,7 +5138,7 @@ class parser { * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the + * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### Error Handling @@ -4895,7 +5247,7 @@ class parser { * * @return Maximum depth, in bytes. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. @@ -4917,9 +5269,14 @@ class parser { /** * The parser instance can use threads when they are available to speed up some * operations. It is enabled by default. Changing this attribute will change the - * behavior of the parser for future operations. + * behavior of the parser for future operations. Set to true by default. */ bool threaded{true}; +#else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif /** @private Use the new DOM API instead */ class Iterator; @@ -5633,7 +5990,7 @@ class element { * * @return The integer value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer does not fit in 64 bits or is negative */ inline operator uint64_t() const noexcept(false); /** @@ -5641,7 +5998,7 @@ class element { * * @return The integer value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer does not fit in 64 bits */ inline operator int64_t() const noexcept(false); /** @@ -5649,7 +6006,6 @@ class element { * * @return The double value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative */ inline operator double() const noexcept(false); /** @@ -5713,6 +6069,8 @@ class element { * - INCORRECT_TYPE if this is not an object */ inline simdjson_result operator[](const char *key) const noexcept; + simdjson_result operator[](int) const noexcept = delete; + /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -5738,6 +6096,21 @@ class element { */ inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) const noexcept; + #ifndef SIMDJSON_DISABLE_DEPRECATED_API /** * @@ -5866,7 +6239,9 @@ struct simdjson_result : public internal::simdjson_result_base operator[](std::string_view key) const noexcept; simdjson_inline simdjson_result operator[](const char *key) const noexcept; + simdjson_result operator[](int) const noexcept = delete; simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result at_path(const std::string_view json_path) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; simdjson_inline simdjson_result at(size_t index) const noexcept; @@ -5914,13 +6289,16 @@ class object { class iterator { public: - using value_type = key_value_pair; + using value_type = const key_value_pair; using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = value_type; + using iterator_category = std::forward_iterator_tag; /** * Get the actual key/value pair */ - inline const value_type operator*() const noexcept; + inline reference operator*() const noexcept; /** * Get the next key/value pair. * @@ -6038,6 +6416,7 @@ class object { * - INCORRECT_TYPE if this is not an object */ inline simdjson_result operator[](const char *key) const noexcept; + simdjson_result operator[](int) const noexcept = delete; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -6064,6 +6443,21 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) const noexcept; + /** * Get the value associated with the given key. * @@ -6093,6 +6487,11 @@ class object { */ inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + /** + * Implicitly convert object to element + */ + inline operator element() const noexcept; + private: simdjson_inline object(const internal::tape_ref &tape) noexcept; @@ -6131,7 +6530,9 @@ struct simdjson_result : public internal::simdjson_result_base operator[](std::string_view key) const noexcept; inline simdjson_result operator[](const char *key) const noexcept; + simdjson_result operator[](int) const noexcept = delete; inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at_path(std::string_view json_path) const noexcept; inline simdjson_result at_key(std::string_view key) const noexcept; inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; @@ -6223,15 +6624,15 @@ class base_formatter { simdjson_inline void one_char(char c); simdjson_inline void call_print_newline() { - this->print_newline(); + static_cast(this)->print_newline(); } simdjson_inline void call_print_indents(size_t depth) { - this->print_indents(depth); + static_cast(this)->print_indents(depth); } simdjson_inline void call_print_space() { - this->print_space(); + static_cast(this)->print_space(); } protected: @@ -6426,46 +6827,92 @@ std::string prettify(simdjson_result x) { #endif /* end file simdjson/dom/serialization.h */ -// Deprecated API -/* including simdjson/dom/jsonparser.h: #include "simdjson/dom/jsonparser.h" */ -/* begin file simdjson/dom/jsonparser.h */ -// TODO Remove this -- deprecated API and files +// Inline functions +/* including simdjson/dom/array-inl.h: #include "simdjson/dom/array-inl.h" */ +/* begin file simdjson/dom/array-inl.h */ +#ifndef SIMDJSON_ARRAY_INL_H +#define SIMDJSON_ARRAY_INL_H -#ifndef SIMDJSON_DOM_JSONPARSER_H -#define SIMDJSON_DOM_JSONPARSER_H +#include /* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/dom/array.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* including simdjson/jsonpathutil.h: #include "simdjson/jsonpathutil.h" */ +/* begin file simdjson/jsonpathutil.h */ +#ifndef SIMDJSON_JSONPATHUTIL_H +#define SIMDJSON_JSONPATHUTIL_H -/* including simdjson/dom/parser-inl.h: #include "simdjson/dom/parser-inl.h" */ -/* begin file simdjson/dom/parser-inl.h */ -#ifndef SIMDJSON_PARSER_INL_H -#define SIMDJSON_PARSER_INL_H +#include +#include -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/document_stream.h" */ -/* skipped duplicate #include "simdjson/implementation.h" */ -/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +namespace simdjson { +/** + * Converts JSONPath to JSON Pointer. + * @param json_path The JSONPath string to be converted. + * @return A string containing the equivalent JSON Pointer. + */ +inline std::string json_path_to_pointer_conversion(std::string_view json_path) { + size_t i = 0; -/* skipped duplicate #include "simdjson/error-inl.h" */ -/* skipped duplicate #include "simdjson/padded_string-inl.h" */ -/* including simdjson/dom/document_stream-inl.h: #include "simdjson/dom/document_stream-inl.h" */ -/* begin file simdjson/dom/document_stream-inl.h */ -#ifndef SIMDJSON_DOCUMENT_STREAM_INL_H -#define SIMDJSON_DOCUMENT_STREAM_INL_H + // if JSONPath starts with $, skip it + if (!json_path.empty() && json_path.front() == '$') { + i = 1; + } + if (json_path.empty() || (json_path[i] != '.' && + json_path[i] != '[')) { + return "-1"; // This is just a sentinel value, the caller should check for this and return an error. + } + + std::string result; + // Reserve space to reduce allocations, adjusting for potential increases due + // to escaping. + result.reserve(json_path.size() * 2); + + while (i < json_path.length()) { + if (json_path[i] == '.') { + result += '/'; + } else if (json_path[i] == '[') { + result += '/'; + ++i; // Move past the '[' + while (i < json_path.length() && json_path[i] != ']') { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + ++i; + } + if (i == json_path.length() || json_path[i] != ']') { + return "-1"; // Using sentinel value that will be handled as an error by the caller. + } + } else { + if (json_path[i] == '~') { + result += "~0"; + } else if (json_path[i] == '/') { + result += "~1"; + } else { + result += json_path[i]; + } + } + ++i; + } -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/document_stream.h" */ -/* including simdjson/dom/element-inl.h: #include "simdjson/dom/element-inl.h" */ -/* begin file simdjson/dom/element-inl.h */ -#ifndef SIMDJSON_ELEMENT_INL_H -#define SIMDJSON_ELEMENT_INL_H + return result; +} +} // namespace simdjson +#endif // SIMDJSON_JSONPATHUTIL_H +/* end file simdjson/jsonpathutil.h */ +/* including simdjson/internal/tape_ref-inl.h: #include "simdjson/internal/tape_ref-inl.h" */ +/* begin file simdjson/internal/tape_ref-inl.h */ +#ifndef SIMDJSON_TAPE_REF_INL_H +#define SIMDJSON_TAPE_REF_INL_H -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/dom/document.h" */ -/* skipped duplicate #include "simdjson/dom/object.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref.h" */ /* including simdjson/internal/tape_type.h: #include "simdjson/internal/tape_type.h" */ /* begin file simdjson/internal/tape_type.h */ #ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H @@ -6498,6 +6945,301 @@ enum class tape_type { #endif // SIMDJSON_INTERNAL_TAPE_TYPE_H /* end file simdjson/internal/tape_type.h */ +#include + +namespace simdjson { +namespace internal { + +constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + +// +// tape_ref inline implementation +// +simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + + +simdjson_inline bool tape_ref::is_document_root() const noexcept { + return json_index == 1; // should we ever change the structure of the tape, this should get updated. +} +simdjson_inline bool tape_ref::usable() const noexcept { + return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). +} +// Some value types have a specific on-tape word value. It can be faster +// to check the type by doing a word-to-word comparison instead of extracting the +// most significant 8 bits. + +simdjson_inline bool tape_ref::is_double() const noexcept { + constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; + return doc->tape[json_index] == tape_double; +} +simdjson_inline bool tape_ref::is_int64() const noexcept { + constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; + return doc->tape[json_index] == tape_int64; +} +simdjson_inline bool tape_ref::is_uint64() const noexcept { + constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; + return doc->tape[json_index] == tape_uint64; +} +simdjson_inline bool tape_ref::is_false() const noexcept { + constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; + return doc->tape[json_index] == tape_false; +} +simdjson_inline bool tape_ref::is_true() const noexcept { + constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; + return doc->tape[json_index] == tape_true; +} +simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { + constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; + return doc->tape[json_index] == tape_null; +} + +inline size_t tape_ref::after_element() const noexcept { + switch (tape_ref_type()) { + case tape_type::START_ARRAY: + case tape_type::START_OBJECT: + return matching_brace_index(); + case tape_type::UINT64: + case tape_type::INT64: + case tape_type::DOUBLE: + return json_index + 2; + default: + return json_index + 1; + } +} +simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { + return static_cast(doc->tape[json_index] >> 56); +} +simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { + return doc->tape[json_index] & internal::JSON_VALUE_MASK; +} +simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + return uint32_t(doc->tape[json_index]); +} +simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { + return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); +} + +template +simdjson_inline T tape_ref::next_tape_value() const noexcept { + static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); + // Though the following is tempting... + // return *reinterpret_cast(&doc->tape[json_index + 1]); + // It is not generally safe. It is safer, and often faster to rely + // on memcpy. Yes, it is uglier, but it is also encapsulated. + T x; + std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + return x; +} + +simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + size_t string_buf_index = size_t(tape_value()); + uint32_t len; + std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + return len; +} + +simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +} + +inline std::string_view internal::tape_ref::get_string_view() const noexcept { + return std::string_view( + get_c_str(), + get_string_length() + ); +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_TAPE_REF_INL_H +/* end file simdjson/internal/tape_ref-inl.h */ + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +#if SIMDJSON_EXCEPTIONS + +inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + inline simdjson_result simdjson_result::at_path(std::string_view json_path) const noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); + } + +inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} + +namespace dom { + +// +// array inline implementation +// +simdjson_inline array::array() noexcept : tape{} {} +simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +inline array::iterator array::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline array::iterator array::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t array::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.scope_count(); +} +inline size_t array::number_of_slots() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.matching_brace_index() - tape.json_index; +} +inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + + // Get the child + auto child = array(tape).at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) const noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +inline simdjson_result array::at(size_t index) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + size_t i=0; + for (auto element : *this) { + if (i == index) { return element; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +inline array::operator element() const noexcept { + return element(tape); +} + +// +// array::iterator inline implementation +// +simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline element array::iterator::operator*() const noexcept { + return element(tape); +} +inline array::iterator& array::iterator::operator++() noexcept { + tape.json_index = tape.after_element(); + return *this; +} +inline array::iterator array::iterator::operator++(int) noexcept { + array::iterator out = *this; + ++*this; + return out; +} +inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool array::iterator::operator==(const array::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool array::iterator::operator<(const array::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool array::iterator::operator>(const array::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} + +} // namespace dom + + +} // namespace simdjson + +/* including simdjson/dom/element-inl.h: #include "simdjson/dom/element-inl.h" */ +/* begin file simdjson/dom/element-inl.h */ +#ifndef SIMDJSON_ELEMENT_INL_H +#define SIMDJSON_ELEMENT_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ +/* skipped duplicate #include "simdjson/internal/tape_type.h" */ + /* including simdjson/dom/object-inl.h: #include "simdjson/dom/object-inl.h" */ /* begin file simdjson/dom/object-inl.h */ #ifndef SIMDJSON_OBJECT_INL_H @@ -6509,6 +7251,7 @@ enum class tape_type { /* skipped duplicate #include "simdjson/dom/element-inl.h" */ /* skipped duplicate #include "simdjson/error-inl.h" */ +/* skipped duplicate #include "simdjson/jsonpathutil.h" */ #include @@ -6536,6 +7279,11 @@ inline simdjson_result simdjson_result::at_pointer(st if (error()) { return error(); } return first.at_pointer(json_pointer); } +inline simdjson_result simdjson_result::at_path(std::string_view json_path) const noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); @@ -6633,6 +7381,12 @@ inline simdjson_result object::at_pointer(std::string_view json_pointer return child; } +inline simdjson_result object::at_path(std::string_view json_path) const noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + inline simdjson_result object::at_key(std::string_view key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { @@ -6655,6 +7409,10 @@ inline simdjson_result object::at_key_case_insensitive(std::string_view return NO_SUCH_FIELD; } +inline object::operator element() const noexcept { + return element(tape); +} + // // object::iterator inline implementation // @@ -6761,6 +7519,7 @@ static_assert(std::ranges::sized_range #include @@ -6874,6 +7633,11 @@ simdjson_inline simdjson_result simdjson_result::at_ if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(const std::string_view json_path) const noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { @@ -7127,6 +7891,23 @@ inline simdjson_result element::operator[](const char *key) const noexc return at_key(key); } +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { @@ -7135,7 +7916,10 @@ inline simdjson_result element::at_pointer(std::string_view json_pointe case internal::tape_type::START_ARRAY: return array(tape).at_pointer(json_pointer); default: { - if(!json_pointer.empty()) { // a non-empty string is invalid on an atom + if (!json_pointer.empty()) { // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } return INVALID_JSON_POINTER; } // an empty string means that we return the current node @@ -7144,6 +7928,11 @@ inline simdjson_result element::at_pointer(std::string_view json_pointe } } } +inline simdjson_result element::at_path(std::string_view json_path) const noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] inline simdjson_result element::at(std::string_view json_pointer) const noexcept { @@ -7204,7 +7993,287 @@ inline std::ostream& operator<<(std::ostream& out, element_type type) { #endif // SIMDJSON_ELEMENT_INL_H /* end file simdjson/dom/element-inl.h */ -/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_ARRAY_INL_H +/* end file simdjson/dom/array-inl.h */ +/* including simdjson/dom/document_stream-inl.h: #include "simdjson/dom/document_stream-inl.h" */ +/* begin file simdjson/dom/document_stream-inl.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_INL_H +#define SIMDJSON_DOCUMENT_STREAM_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream.h" */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* including simdjson/dom/parser-inl.h: #include "simdjson/dom/parser-inl.h" */ +/* begin file simdjson/dom/parser-inl.h */ +#ifndef SIMDJSON_PARSER_INL_H +#define SIMDJSON_PARSER_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream.h" */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* skipped duplicate #include "simdjson/padded_string-inl.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream-inl.h" */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ + +#include +#include /* memcmp */ + +namespace simdjson { +namespace dom { + +// +// parser inline implementation +// +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr) { +} +simdjson_inline parser::parser(parser &&other) noexcept = default; +simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; + +inline bool parser::is_valid() const noexcept { return valid; } +inline int parser::get_error_code() const noexcept { return error; } +inline std::string parser::get_error_message() const noexcept { return error_message(error); } + +inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { + return valid ? doc.dump_raw_tape(os) : false; +} + +inline simdjson_result parser::read_file(const std::string &path) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(path.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + __int64 len = _ftelli64(fp); + if(len == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long len = std::ftell(fp); + if((len < 0) || (len == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Make sure we have enough capacity to load the file + if (_loaded_bytes_capacity < size_t(len)) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + std::fclose(fp); + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + + // Read the string + std::rewind(fp); + size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { + return IO_ERROR; + } + + return bytes_read; +} + +inline simdjson_result parser::load(const std::string &path) & noexcept { + return load_into_document(doc, path); +} + +inline simdjson_result parser::load_into_document(document& provided_doc, const std::string &path) & noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return parse_into_document(provided_doc, loaded_bytes.get(), len, false); +} + +inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); +} + +inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + // Important: we need to ensure that document has enough capacity. + // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! + error_code _error = ensure_capacity(provided_doc, len); + if (_error) { return _error; } + if (realloc_if_needed) { + // Make sure we have enough capacity to copy len bytes + if (!loaded_bytes || _loaded_bytes_capacity < len) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + std::memcpy(static_cast(loaded_bytes.get()), buf, len); + buf = reinterpret_cast(loaded_bytes.get()); + } + + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + _error = implementation->parse(buf, len, provided_doc); + + if (_error) { return _error; } + + return provided_doc.root(); +} + +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), false); +} + + +inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(doc, buf, len, realloc_if_needed); +} + +simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse(reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { + return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { + return parse(s.data(), s.length(), false); +} +simdjson_inline simdjson_result parser::parse(const padded_string_view &v) & noexcept { + return parse(v.data(), v.length(), false); +} + +inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { + return parse_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + +simdjson_warn_unused +inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { + // + // Reallocate implementation if needed + // + error_code err; + if (implementation) { + err = implementation->allocate(capacity, max_depth); + } else { + err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); + } + if (err) { return err; } + return SUCCESS; +} + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_warn_unused +inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { + return !allocate(capacity, max_depth); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { + return ensure_capacity(doc, desired_capacity); +} + + +inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { + // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. + // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. + if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } + // If we don't have enough capacity, (try to) automatically bump it. + // If the document needs allocation, do it too. + // Both in one if statement to minimize unlikely branching. + // + // Note: we must make sure that this function is called if capacity() == 0. We do so because we + // ensure that desired_capacity > 0. + if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { + if (desired_capacity > max_capacity()) { + return error = CAPACITY; + } + error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; + error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; + if(err1 != SUCCESS) { return error = err1; } + if(err2 != SUCCESS) { return error = err2; } + } + return SUCCESS; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_PARSER_INL_H +/* end file simdjson/dom/parser-inl.h */ /* skipped duplicate #include "simdjson/error-inl.h" */ /* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ @@ -7423,7 +8492,11 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc return std::string_view(start, next_doc_index - current_index() + 1); } else { size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; - return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); + size_t svlen = next_doc_index - current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); } } @@ -7543,11369 +8616,10229 @@ simdjson_inline dom::document_stream::iterator simdjson_result -#include /* memcmp */ +/* skipped duplicate #include "simdjson/base.h" */ +#include +#include +#include namespace simdjson { -namespace dom { +namespace internal { -// -// parser inline implementation -// -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity}, - loaded_bytes(nullptr) { -} -simdjson_inline parser::parser(parser &&other) noexcept = default; -simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); -inline bool parser::is_valid() const noexcept { return valid; } -inline int parser::get_error_code() const noexcept { return error; } -inline std::string parser::get_error_message() const noexcept { return error_message(error); } +class escape_json_string { +public: + escape_json_string(std::string_view _str) noexcept : str{_str} {} + operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } +private: + std::string_view str; + friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); +}; -inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { - return valid ? doc.dump_raw_tape(os) : false; +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { + for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { + // TODO can this be done once at the beginning, or will it mess up << char? + std::ios::fmtflags f(out.flags()); + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); + out.flags(f); + } else { + out << unescaped.str[i]; + } + } + } + return out; } -inline simdjson_result parser::read_file(const std::string &path) noexcept { - // Open the file - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - std::FILE *fp = std::fopen(path.c_str(), "rb"); - SIMDJSON_POP_DISABLE_WARNINGS - - if (fp == nullptr) { - return IO_ERROR; - } +} // namespace internal +} // namespace simdjson - // Get the file size - int ret; -#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS - ret = _fseeki64(fp, 0, SEEK_END); -#else - ret = std::fseek(fp, 0, SEEK_END); -#endif // _WIN64 - if(ret < 0) { - std::fclose(fp); - return IO_ERROR; - } -#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS - __int64 len = _ftelli64(fp); - if(len == -1L) { - std::fclose(fp); - return IO_ERROR; - } -#else - long len = std::ftell(fp); - if((len < 0) || (len == LONG_MAX)) { - std::fclose(fp); - return IO_ERROR; - } -#endif +#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H +/* end file simdjson/internal/jsonformatutils.h */ - // Make sure we have enough capacity to load the file - if (_loaded_bytes_capacity < size_t(len)) { - loaded_bytes.reset( internal::allocate_padded_buffer(len) ); - if (!loaded_bytes) { - std::fclose(fp); - return MEMALLOC; - } - _loaded_bytes_capacity = len; - } +#include - // Read the string - std::rewind(fp); - size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); - if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { - return IO_ERROR; - } +namespace simdjson { +namespace dom { - return bytes_read; +// +// document inline implementation +// +inline element document::root() const noexcept { + return element(internal::tape_ref(this, 1)); } - -inline simdjson_result parser::load(const std::string &path) & noexcept { - return load_into_document(doc, path); +simdjson_warn_unused +inline size_t document::capacity() const noexcept { + return allocated_capacity; } -inline simdjson_result parser::load_into_document(document& provided_doc, const std::string &path) & noexcept { - size_t len; - auto _error = read_file(path).get(len); - if (_error) { return _error; } - return parse_into_document(provided_doc, loaded_bytes.get(), len, false); -} +simdjson_warn_unused +inline error_code document::allocate(size_t capacity) noexcept { + if (capacity == 0) { + string_buf.reset(); + tape.reset(); + allocated_capacity = 0; + return SUCCESS; + } -inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { - size_t len; - auto _error = read_file(path).get(len); - if (_error) { return _error; } - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); + // a pathological input like "[[[[..." would generate capacity tape elements, so + // need a capacity of at least capacity + 1, but it is also possible to do + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + //where capacity + 1 tape elements are + // generated, see issue https://github.com/simdjson/simdjson/issues/345 + size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); + // a document with only zero-length strings... could have capacity/3 string + // and we would need capacity/3 * 5 bytes on the string buffer + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); + tape.reset(new (std::nothrow) uint64_t[tape_capacity]); + if(!(string_buf && tape)) { + allocated_capacity = 0; + string_buf.reset(); + tape.reset(); + return MEMALLOC; + } + // Technically the allocated_capacity might be larger than capacity + // so the next line is pessimistic. + allocated_capacity = capacity; + return SUCCESS; } -inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { - // Important: we need to ensure that document has enough capacity. - // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! - error_code _error = ensure_capacity(provided_doc, len); - if (_error) { return _error; } - if (realloc_if_needed) { - // Make sure we have enough capacity to copy len bytes - if (!loaded_bytes || _loaded_bytes_capacity < len) { - loaded_bytes.reset( internal::allocate_padded_buffer(len) ); - if (!loaded_bytes) { - return MEMALLOC; +inline bool document::dump_raw_tape(std::ostream &os) const noexcept { + uint32_t string_length; + size_t tape_idx = 0; + uint64_t tape_val = tape[tape_idx]; + uint8_t type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type; + tape_idx++; + size_t how_many = 0; + if (type == 'r') { + how_many = size_t(tape_val & internal::JSON_VALUE_MASK); + } else { + // Error: no starting root node? + return false; + } + os << "\t// pointing to " << how_many << " (right after last node)\n"; + uint64_t payload; + for (; tape_idx < how_many; tape_idx++) { + os << tape_idx << " : "; + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + switch (type) { + case '"': // we have a string + os << "string \""; + std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); + os << internal::escape_json_string(std::string_view( + reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), + string_length + )); + os << '"'; + os << '\n'; + break; + case 'l': // we have a long int + if (tape_idx + 1 >= how_many) { + return false; } - _loaded_bytes_capacity = len; + os << "integer " << static_cast(tape[++tape_idx]) << "\n"; + break; + case 'u': // we have a long uint + if (tape_idx + 1 >= how_many) { + return false; + } + os << "unsigned integer " << tape[++tape_idx] << "\n"; + break; + case 'd': // we have a double + os << "float "; + if (tape_idx + 1 >= how_many) { + return false; + } + double answer; + std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); + os << answer << '\n'; + break; + case 'n': // we have a null + os << "null\n"; + break; + case 't': // we have a true + os << "true\n"; + break; + case 'f': // we have a false + os << "false\n"; + break; + case '{': // we have an object + os << "{\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; case '}': // we end an object + os << "}\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case '[': // we start an array + os << "[\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; + case ']': // we end an array + os << "]\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case 'r': // we start and end with the root node + // should we be hitting the root node? + return false; + default: + return false; } - std::memcpy(static_cast(loaded_bytes.get()), buf, len); - buf = reinterpret_cast(loaded_bytes.get()); } + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type << "\t// pointing to " << payload + << " (start root)\n"; + return true; +} - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - _error = implementation->parse(buf, len, provided_doc); +} // namespace dom +} // namespace simdjson - if (_error) { return _error; } +#endif // SIMDJSON_DOCUMENT_INL_H +/* end file simdjson/dom/document-inl.h */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/dom/object-inl.h" */ +/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ +/* including simdjson/dom/serialization-inl.h: #include "simdjson/dom/serialization-inl.h" */ +/* begin file simdjson/dom/serialization-inl.h */ - return provided_doc.root(); -} +#ifndef SIMDJSON_SERIALIZATION_INL_H +#define SIMDJSON_SERIALIZATION_INL_H -simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { - return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); -} -simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { - return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); -} -simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { - return parse_into_document(provided_doc, s.data(), s.length(), false); -} +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/serialization.h" */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/internal/tape_type.h" */ +/* skipped duplicate #include "simdjson/dom/array-inl.h" */ +/* skipped duplicate #include "simdjson/dom/object-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ -inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { - return parse_into_document(doc, buf, len, realloc_if_needed); -} +#include -simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { - return parse(reinterpret_cast(buf), len, realloc_if_needed); -} -simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { - return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); -} -simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { - return parse(s.data(), s.length(), false); -} -simdjson_inline simdjson_result parser::parse(const padded_string_view &v) & noexcept { - return parse(v.data(), v.length(), false); +namespace simdjson { +namespace dom { +inline bool parser::print_json(std::ostream &os) const noexcept { + if (!valid) { return false; } + simdjson::internal::string_builder<> sb; + sb.append(doc.root()); + std::string_view answer = sb.str(); + os << answer; + return true; } -inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - return document_stream(*this, buf, len, batch_size); -} -inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { - return parse_many(reinterpret_cast(buf), len, batch_size); +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } -inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { - return parse_many(s.data(), s.length(), batch_size); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { - return parse_many(s.data(), s.length(), batch_size); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } - -simdjson_inline size_t parser::capacity() const noexcept { - return implementation ? implementation->capacity() : 0; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } -simdjson_inline size_t parser::max_depth() const noexcept { - return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } +#endif -simdjson_warn_unused -inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { - // - // Reallocate implementation if needed - // - error_code err; - if (implementation) { - err = implementation->allocate(capacity, max_depth); +} // namespace dom + +/*** + * Number utility functions + **/ +namespace { +/**@private + * Escape sequence like \b or \u0001 + * We expect that most compilers will use 8 bytes for this data structure. + **/ +struct escape_sequence { + uint8_t length; + const char string[7]; // technically, we only ever need 6 characters, we pad to 8 +}; +/**@private + * This converts a signed integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 20 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +static char *fast_itoa(char *output, int64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + uint64_t value_positive; + // In general, negating a signed integer is unsafe. + if(value < 0) { + *output++ = '-'; + // Doing value_positive = -value; while avoiding + // undefined behavior warnings. + // It assumes two complement's which is universal at this + // point in time. + std::memcpy(&value_positive, &value, sizeof(value)); + value_positive = (~value_positive) + 1; // this is a negation } else { - err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); + value_positive = value; } - if (err) { return err; } - return SUCCESS; + // We work solely with value_positive. It *might* be easier + // for an optimizing compiler to deal with an unsigned variable + // as far as performance goes. + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value_positive >= 10) { + *write_pointer-- = char('0' + (value_positive % 10)); + value_positive /= 10; + } + *write_pointer = char('0' + value_positive); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; } - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -simdjson_warn_unused -inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { - return !allocate(capacity, max_depth); +/**@private + * This converts an unsigned integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 19 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +static char *fast_itoa(char *output, uint64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value >= 10) { + *write_pointer-- = char('0' + (value % 10)); + value /= 10; + }; + *write_pointer = char('0' + value); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; } -#endif // SIMDJSON_DISABLE_DEPRECATED_API -inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { - return ensure_capacity(doc, desired_capacity); -} +} // anonymous namespace +namespace internal { -inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { - // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. - // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. - if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } - // If we don't have enough capacity, (try to) automatically bump it. - // If the document needs allocation, do it too. - // Both in one if statement to minimize unlikely branching. - // - // Note: we must make sure that this function is called if capacity() == 0. We do so because we - // ensure that desired_capacity > 0. - if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { - if (desired_capacity > max_capacity()) { - return error = CAPACITY; - } - error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; - error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; - if(err1 != SUCCESS) { return error = err1; } - if(err2 != SUCCESS) { return error = err2; } - } - return SUCCESS; -} +/*** + * Minifier/formatter code. + **/ -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = MINIMAL_DOCUMENT_CAPACITY; - } +template +simdjson_inline void base_formatter::number(uint64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); } -} // namespace dom -} // namespace simdjson - -#endif // SIMDJSON_PARSER_INL_H -/* end file simdjson/dom/parser-inl.h */ - -namespace simdjson { - -// -// C API (json_parse and build_parsed_json) declarations -// - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept { - error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const padded_string &s, dom::parser &parser) noexcept { - error_code code = parser.parse(s).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} - -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { - dom::parser parser; - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; -} -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { - dom::parser parser; - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; -} -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { - dom::parser parser; - error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; +template +simdjson_inline void base_formatter::number(int64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); } -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept { - dom::parser parser; - error_code code = parser.parse(s).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; + +template +simdjson_inline void base_formatter::number(double x) { + char number_buffer[24]; + // Currently, passing the nullptr to the second argument is + // safe because our implementation does not check the second + // argument. + char *newp = internal::to_chars(number_buffer, nullptr, x); + buffer.insert(buffer.end(), number_buffer, newp); } -#endif // SIMDJSON_DISABLE_DEPRECATED_API -/** @private We do not want to allow implicit conversion from C string to std::string. */ -int json_parse(const char *buf, dom::parser &parser) noexcept = delete; -/** @private We do not want to allow implicit conversion from C string to std::string. */ -dom::parser build_parsed_json(const char *buf) noexcept = delete; +template +simdjson_inline void base_formatter::start_array() { one_char('['); } -} // namespace simdjson -#endif // SIMDJSON_DOM_JSONPARSER_H -/* end file simdjson/dom/jsonparser.h */ -/* including simdjson/dom/parsedjson.h: #include "simdjson/dom/parsedjson.h" */ -/* begin file simdjson/dom/parsedjson.h */ -// TODO Remove this -- deprecated API and files +template +simdjson_inline void base_formatter::end_array() { one_char(']'); } -#ifndef SIMDJSON_DOM_PARSEDJSON_H -#define SIMDJSON_DOM_PARSEDJSON_H +template +simdjson_inline void base_formatter::start_object() { one_char('{'); } -/* skipped duplicate #include "simdjson/dom/base.h" */ +template +simdjson_inline void base_formatter::end_object() { one_char('}'); } -namespace simdjson { +template +simdjson_inline void base_formatter::comma() { one_char(','); } -/** - * @deprecated Use `dom::parser` instead. - */ -using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; +template +simdjson_inline void base_formatter::true_atom() { + const char * s = "true"; + buffer.insert(buffer.end(), s, s + 4); +} -} // namespace simdjson +template +simdjson_inline void base_formatter::false_atom() { + const char * s = "false"; + buffer.insert(buffer.end(), s, s + 5); +} -#endif // SIMDJSON_DOM_PARSEDJSON_H -/* end file simdjson/dom/parsedjson.h */ -/* including simdjson/dom/parsedjson_iterator.h: #include "simdjson/dom/parsedjson_iterator.h" */ -/* begin file simdjson/dom/parsedjson_iterator.h */ -// TODO Remove this -- deprecated API and files +template +simdjson_inline void base_formatter::null_atom() { + const char * s = "null"; + buffer.insert(buffer.end(), s, s + 4); +} -#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H -#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +template +simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/parser.h" */ +template +simdjson_inline void base_formatter::key(std::string_view unescaped) { + string(unescaped); + one_char(':'); +} -#ifndef SIMDJSON_DISABLE_DEPRECATED_API +template +simdjson_inline void base_formatter::string(std::string_view unescaped) { + one_char('\"'); + size_t i = 0; + // Fast path for the case where we have no control character, no ", and no backslash. + // This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise operation + // with bool types. + constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for(;i + 8 <= unescaped.length(); i += 8) { + // Poor's man vectorization. This could get much faster if we used SIMD. + // + // It is not the case that replacing '|' with '||' would be neutral performance-wise. + if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] + | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] + | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] + | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] + ) { break; } + } + for(;i < unescaped.length(); i++) { + if(needs_escaping[uint8_t(unescaped[i])]) { break; } + } + // The following is also possible and omits a 256-byte table, but it is slower: + // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} -namespace simdjson { -/** @private **/ -class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { -public: - inline Iterator(const dom::parser &parser) noexcept(false); - inline Iterator(const Iterator &o) noexcept; - inline ~Iterator() noexcept; + // At least for long strings, the following should be fast. We could + // do better by integrating the checks and the insertion. + buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + // We caught a control character if we enter this loop (slow). + // Note that we are do not restart from the beginning, but rather we continue + // from the point where we encountered something that requires escaping. + for (; i < unescaped.length(); i++) { + switch (unescaped[i]) { + case '\"': + { + const char * s = "\\\""; + buffer.insert(buffer.end(), s, s + 2); + } + break; + case '\\': + { + const char * s = "\\\\"; + buffer.insert(buffer.end(), s, s + 2); + } + break; + default: + if (uint8_t(unescaped[i]) <= 0x1F) { + // If packed, this uses 8 * 32 bytes. + // Note that we expect most compilers to embed this code in the data + // section. + constexpr static escape_sequence escaped[32] = { + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + auto u = escaped[uint8_t(unescaped[i])]; + buffer.insert(buffer.end(), u.string, u.string + u.length); + } else { + one_char(unescaped[i]); + } + } // switch + } // for + one_char('\"'); +} - inline Iterator& operator=(const Iterator&) = delete; - inline bool is_ok() const; - - // useful for debugging purposes - inline size_t get_tape_location() const; +template +inline void base_formatter::clear() { + buffer.clear(); +} - // useful for debugging purposes - inline size_t get_tape_length() const; - - // returns the current depth (start at 1 with 0 reserved for the fictitious - // root node) - inline size_t get_depth() const; - - // A scope is a series of nodes at the same depth, typically it is either an - // object ({) or an array ([). The root node has type 'r'. - inline uint8_t get_scope_type() const; - - // move forward in document order - inline bool move_forward(); - - // retrieve the character code of what we're looking at: - // [{"slutfn are the possibilities - inline uint8_t get_type() const { - return current_type; // short functions should be inlined! - } - - // get the int64_t value at this node; valid only if get_type is "l" - inline int64_t get_integer() const; - - // get the value as uint64; valid only if if get_type is "u" - inline uint64_t get_unsigned_integer() const; - - // get the string value at this node (NULL ended); valid only if get_type is " - // note that tabs, and line endings are escaped in the returned value (see - // print_with_escapes) return value is valid UTF-8, it may contain NULL chars - // within the string: get_string_length determines the true string length. - inline const char *get_string() const; - - // return the length of the string in bytes - inline uint32_t get_string_length() const; - - // get the double value at this node; valid only if - // get_type() is "d" - inline double get_double() const; - - inline bool is_object_or_array() const { return is_object() || is_array(); } - - inline bool is_object() const { return get_type() == '{'; } - - inline bool is_array() const { return get_type() == '['; } - - inline bool is_string() const { return get_type() == '"'; } - - // Returns true if the current type of the node is an signed integer. - // You can get its value with `get_integer()`. - inline bool is_integer() const { return get_type() == 'l'; } - - // Returns true if the current type of the node is an unsigned integer. - // You can get its value with `get_unsigned_integer()`. - // - // NOTE: - // Only a large value, which is out of range of a 64-bit signed integer, is - // represented internally as an unsigned node. On the other hand, a typical - // positive integer, such as 1, 42, or 1000000, is as a signed node. - // Be aware this function returns false for a signed node. - inline bool is_unsigned_integer() const { return get_type() == 'u'; } - // Returns true if the current type of the node is a double floating-point number. - inline bool is_double() const { return get_type() == 'd'; } - // Returns true if the current type of the node is a number (integer or floating-point). - inline bool is_number() const { - return is_integer() || is_unsigned_integer() || is_double(); - } - // Returns true if the current type of the node is a bool with true value. - inline bool is_true() const { return get_type() == 't'; } - // Returns true if the current type of the node is a bool with false value. - inline bool is_false() const { return get_type() == 'f'; } - // Returns true if the current type of the node is null. - inline bool is_null() const { return get_type() == 'n'; } - // Returns true if the type byte represents an object of an array - static bool is_object_or_array(uint8_t type) { - return ((type == '[') || (type == '{')); - } - - // when at {, go one level deep, looking for a given key - // if successful, we are left pointing at the value, - // if not, we are still pointing at the object ({) - // (in case of repeated keys, this only finds the first one). - // We seek the key using C's strcmp so if your JSON strings contain - // NULL chars, this would trigger a false positive: if you expect that - // to be the case, take extra precautions. - // Furthermore, we do the comparison character-by-character - // without taking into account Unicode equivalence. - inline bool move_to_key(const char *key); - - // as above, but case insensitive lookup (strcmpi instead of strcmp) - inline bool move_to_key_insensitive(const char *key); - - // when at {, go one level deep, looking for a given key - // if successful, we are left pointing at the value, - // if not, we are still pointing at the object ({) - // (in case of repeated keys, this only finds the first one). - // The string we search for can contain NULL values. - // Furthermore, we do the comparison character-by-character - // without taking into account Unicode equivalence. - inline bool move_to_key(const char *key, uint32_t length); - - // when at a key location within an object, this moves to the accompanying - // value (located next to it). This is equivalent but much faster than - // calling "next()". - inline void move_to_value(); - - // when at [, go one level deep, and advance to the given index. - // if successful, we are left pointing at the value, - // if not, we are still pointing at the array ([) - inline bool move_to_index(uint32_t index); - - // Moves the iterator to the value corresponding to the json pointer. - // Always search from the root of the document. - // if successful, we are left pointing at the value, - // if not, we are still pointing the same value we were pointing before the - // call. The json pointer follows the rfc6901 standard's syntax: - // https://tools.ietf.org/html/rfc6901 However, the standard says "If a - // referenced member name is not unique in an object, the member that is - // referenced is undefined, and evaluation fails". Here we just return the - // first corresponding value. The length parameter is the length of the - // jsonpointer string ('pointer'). - inline bool move_to(const char *pointer, uint32_t length); - - // Moves the iterator to the value corresponding to the json pointer. - // Always search from the root of the document. - // if successful, we are left pointing at the value, - // if not, we are still pointing the same value we were pointing before the - // call. The json pointer implementation follows the rfc6901 standard's - // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says - // "If a referenced member name is not unique in an object, the member that - // is referenced is undefined, and evaluation fails". Here we just return - // the first corresponding value. - inline bool move_to(const std::string &pointer); +template +simdjson_inline std::string_view base_formatter::str() const { + return std::string_view(buffer.data(), buffer.size()); +} - private: - // Almost the same as move_to(), except it searches from the current - // position. The pointer's syntax is identical, though that case is not - // handled by the rfc6901 standard. The '/' is still required at the - // beginning. However, contrary to move_to(), the URI Fragment Identifier - // Representation is not supported here. Also, in case of failure, we are - // left pointing at the closest value it could reach. For these reasons it - // is private. It exists because it is used by move_to(). - inline bool relative_move_to(const char *pointer, uint32_t length); +simdjson_inline void mini_formatter::print_newline() { + return; +} - public: - // throughout return true if we can do the navigation, false - // otherwise +simdjson_inline void mini_formatter::print_indents(size_t depth) { + (void)depth; + return; +} - // Within a given scope (series of nodes at the same depth within either an - // array or an object), we move forward. - // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { - // and [. At the object ({) or at the array ([), you can issue a "down" to - // visit their content. valid if we're not at the end of a scope (returns - // true). - inline bool next(); +simdjson_inline void mini_formatter::print_space() { + return; +} - // Within a given scope (series of nodes at the same depth within either an - // array or an object), we move backward. - // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true - // when starting at the end of the scope. At the object ({) or at the array - // ([), you can issue a "down" to visit their content. - // Performance warning: This function is implemented by starting again - // from the beginning of the scope and scanning forward. You should expect - // it to be relatively slow. - inline bool prev(); +simdjson_inline void pretty_formatter::print_newline() { + one_char('\n'); +} - // Moves back to either the containing array or object (type { or [) from - // within a contained scope. - // Valid unless we are at the first level of the document - inline bool up(); +simdjson_inline void pretty_formatter::print_indents(size_t depth) { + if(this->indent_step <= 0) { + return; + } + for(size_t i = 0; i < this->indent_step * depth; i++) { + one_char(' '); + } +} - // Valid if we're at a [ or { and it starts a non-empty scope; moves us to - // start of that deeper scope if it not empty. Thus, given [true, null, - // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. - inline bool down(); +simdjson_inline void pretty_formatter::print_space() { + one_char(' '); +} - // move us to the start of our current scope, - // a scope is a series of nodes at the same level - inline void to_start_scope(); +/*** + * String building code. + **/ - inline void rewind(); +template +inline void string_builder::append(simdjson::dom::element value) { + // using tape_type = simdjson::internal::tape_type; + size_t depth = 0; + constexpr size_t MAX_DEPTH = 16; + bool is_object[MAX_DEPTH]; + is_object[0] = false; + bool after_value = false; + internal::tape_ref iter(value.tape); + do { + // print commas after each value + if (after_value) { + format.comma(); + format.print_newline(); + } + format.print_indents(depth); - // print the node we are currently pointing at - inline bool print(std::ostream &os, bool escape_strings = true) const; + // If we are in an object, print the next key and :, and skip to the next + // value. + if (is_object[depth]) { + format.key(iter.get_string_view()); + format.print_space(); + iter.json_index++; + } + switch (iter.tape_ref_type()) { - private: - const document &doc; - size_t max_depth{}; - size_t depth{}; - size_t location{}; // our current location on a tape - size_t tape_length{}; - uint8_t current_type{}; - uint64_t current_val{}; - typedef struct { - size_t start_of_scope; - uint8_t scope_type; - } scopeindex_t; - - scopeindex_t *depth_index{}; -}; + // Arrays + case tape_type::START_ARRAY: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::array(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] + depth--; + break; + } -} // namespace simdjson -#endif // SIMDJSON_DISABLE_DEPRECATED_API + // Output start [ + format.start_array(); + iter.json_index++; -#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H -/* end file simdjson/dom/parsedjson_iterator.h */ + // Handle empty [] (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + depth--; + break; + } -// Inline functions -/* including simdjson/dom/array-inl.h: #include "simdjson/dom/array-inl.h" */ -/* begin file simdjson/dom/array-inl.h */ -#ifndef SIMDJSON_ARRAY_INL_H -#define SIMDJSON_ARRAY_INL_H + is_object[depth] = false; + after_value = false; + format.print_newline(); + continue; + } -#include + // Objects + case tape_type::START_OBJECT: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::object(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the } + depth--; + break; + } -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/array.h" */ -/* skipped duplicate #include "simdjson/dom/element.h" */ -/* skipped duplicate #include "simdjson/error-inl.h" */ -/* including simdjson/internal/tape_ref-inl.h: #include "simdjson/internal/tape_ref-inl.h" */ -/* begin file simdjson/internal/tape_ref-inl.h */ -#ifndef SIMDJSON_TAPE_REF_INL_H -#define SIMDJSON_TAPE_REF_INL_H + // Output start { + format.start_object(); + iter.json_index++; -/* skipped duplicate #include "simdjson/dom/document.h" */ -/* skipped duplicate #include "simdjson/internal/tape_ref.h" */ -/* skipped duplicate #include "simdjson/internal/tape_type.h" */ + // Handle empty {} (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_OBJECT) { + format.end_object(); + depth--; + break; + } -#include + is_object[depth] = true; + after_value = false; + format.print_newline(); + continue; + } -namespace simdjson { -namespace internal { + // Scalars + case tape_type::STRING: + format.string(iter.get_string_view()); + break; + case tape_type::INT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::UINT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::DOUBLE: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::TRUE_VALUE: + format.true_atom(); + break; + case tape_type::FALSE_VALUE: + format.false_atom(); + break; + case tape_type::NULL_VALUE: + format.null_atom(); + break; -constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; -constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + // These are impossible + case tape_type::END_ARRAY: + case tape_type::END_OBJECT: + case tape_type::ROOT: + SIMDJSON_UNREACHABLE(); + } + iter.json_index++; + after_value = true; -// -// tape_ref inline implementation -// -simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} -simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + // Handle multiple ends in a row + while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || + iter.tape_ref_type() == tape_type::END_OBJECT)) { + format.print_newline(); + depth--; + format.print_indents(depth); + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + } else { + format.end_object(); + } + iter.json_index++; + } + // Stop when we're at depth 0 + } while (depth != 0); -simdjson_inline bool tape_ref::is_document_root() const noexcept { - return json_index == 1; // should we ever change the structure of the tape, this should get updated. -} -simdjson_inline bool tape_ref::usable() const noexcept { - return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). + format.print_newline(); } -// Some value types have a specific on-tape word value. It can be faster -// to check the type by doing a word-to-word comparison instead of extracting the -// most significant 8 bits. -simdjson_inline bool tape_ref::is_double() const noexcept { - constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; - return doc->tape[json_index] == tape_double; -} -simdjson_inline bool tape_ref::is_int64() const noexcept { - constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; - return doc->tape[json_index] == tape_int64; -} -simdjson_inline bool tape_ref::is_uint64() const noexcept { - constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; - return doc->tape[json_index] == tape_uint64; -} -simdjson_inline bool tape_ref::is_false() const noexcept { - constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; - return doc->tape[json_index] == tape_false; -} -simdjson_inline bool tape_ref::is_true() const noexcept { - constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; - return doc->tape[json_index] == tape_true; -} -simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { - constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; - return doc->tape[json_index] == tape_null; +template +inline void string_builder::append(simdjson::dom::object value) { + format.start_object(); + auto pair = value.begin(); + auto end = value.end(); + if (pair != end) { + append(*pair); + for (++pair; pair != end; ++pair) { + format.comma(); + append(*pair); + } + } + format.end_object(); } -inline size_t tape_ref::after_element() const noexcept { - switch (tape_ref_type()) { - case tape_type::START_ARRAY: - case tape_type::START_OBJECT: - return matching_brace_index(); - case tape_type::UINT64: - case tape_type::INT64: - case tape_type::DOUBLE: - return json_index + 2; - default: - return json_index + 1; +template +inline void string_builder::append(simdjson::dom::array value) { + format.start_array(); + auto iter = value.begin(); + auto end = value.end(); + if (iter != end) { + append(*iter); + for (++iter; iter != end; ++iter) { + format.comma(); + append(*iter); + } } -} -simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { - return static_cast(doc->tape[json_index] >> 56); -} -simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { - return doc->tape[json_index] & internal::JSON_VALUE_MASK; -} -simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { - return uint32_t(doc->tape[json_index]); -} -simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { - return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); + format.end_array(); } -template -simdjson_inline T tape_ref::next_tape_value() const noexcept { - static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); - // Though the following is tempting... - // return *reinterpret_cast(&doc->tape[json_index + 1]); - // It is not generally safe. It is safer, and often faster to rely - // on memcpy. Yes, it is uglier, but it is also encapsulated. - T x; - std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); - return x; +template +simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { + format.key(kv.key); + append(kv.value); } -simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { - size_t string_buf_index = size_t(tape_value()); - uint32_t len; - std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); - return len; +template +simdjson_inline void string_builder::clear() { + format.clear(); } -simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { - size_t string_buf_index = size_t(tape_value()); - return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +template +simdjson_inline std::string_view string_builder::str() const { + return format.str(); } -inline std::string_view internal::tape_ref::get_string_view() const noexcept { - return std::string_view( - get_c_str(), - get_string_length() - ); -} } // namespace internal } // namespace simdjson -#endif // SIMDJSON_TAPE_REF_INL_H -/* end file simdjson/internal/tape_ref-inl.h */ - -#include +#endif +/* end file simdjson/dom/serialization-inl.h */ -namespace simdjson { +#endif // SIMDJSON_DOM_H +/* end file simdjson/dom.h */ +/* including simdjson/ondemand.h: #include "simdjson/ondemand.h" */ +/* begin file simdjson/ondemand.h */ +#ifndef SIMDJSON_ONDEMAND_H +#define SIMDJSON_ONDEMAND_H -// -// simdjson_result inline implementation -// -simdjson_inline simdjson_result::simdjson_result() noexcept - : internal::simdjson_result_base() {} -simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept - : internal::simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base(error) {} +/* including simdjson/builtin/ondemand.h: #include "simdjson/builtin/ondemand.h" */ +/* begin file simdjson/builtin/ondemand.h */ +#ifndef SIMDJSON_BUILTIN_ONDEMAND_H +#define SIMDJSON_BUILTIN_ONDEMAND_H -#if SIMDJSON_EXCEPTIONS +/* including simdjson/builtin.h: #include "simdjson/builtin.h" */ +/* begin file simdjson/builtin.h */ +#ifndef SIMDJSON_BUILTIN_H +#define SIMDJSON_BUILTIN_H -inline dom::array::iterator simdjson_result::begin() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); -} -inline dom::array::iterator simdjson_result::end() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); -} -inline size_t simdjson_result::size() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.size(); -} +/* including simdjson/builtin/base.h: #include "simdjson/builtin/base.h" */ +/* begin file simdjson/builtin/base.h */ +#ifndef SIMDJSON_BUILTIN_BASE_H +#define SIMDJSON_BUILTIN_BASE_H -#endif // SIMDJSON_EXCEPTIONS +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/implementation_detection.h: #include "simdjson/implementation_detection.h" */ +/* begin file simdjson/implementation_detection.h */ +#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H +#define SIMDJSON_IMPLEMENTATION_DETECTION_H -inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} -inline simdjson_result simdjson_result::at(size_t index) const noexcept { - if (error()) { return error(); } - return first.at(index); -} +/* skipped duplicate #include "simdjson/base.h" */ -namespace dom { +// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. +#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 +#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 +#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 +#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 +#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 +#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 +#define SIMDJSON_IMPLEMENTATION_ID_lsx 7 +#define SIMDJSON_IMPLEMENTATION_ID_lasx 8 + +#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) +#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) + +#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) // -// array inline implementation +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. // -simdjson_inline array::array() noexcept : tape{} {} -simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} -inline array::iterator array::begin() const noexcept { - SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 - return internal::tape_ref(tape.doc, tape.json_index + 1); -} -inline array::iterator array::end() const noexcept { - SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 - return internal::tape_ref(tape.doc, tape.after_element() - 1); -} -inline size_t array::size() const noexcept { - SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 - return tape.scope_count(); -} -inline size_t array::number_of_slots() const noexcept { - SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 - return tape.matching_brace_index() - tape.json_index; -} -inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { - SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 - if(json_pointer.empty()) { // an empty string means that we return the current node - return element(this->tape); // copy the current node - } else if(json_pointer[0] != '/') { // otherwise there is an error - return INVALID_JSON_POINTER; - } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; - } +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 0 +#endif - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif - // Get the child - auto child = array(tape).at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; - } - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); - } - return child; -} +#else -inline simdjson_result array::at(size_t index) const noexcept { - SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 - size_t i=0; - for (auto element : *this) { - if (i == index) { return element; } - i++; - } - return INDEX_OUT_OF_BOUNDS; -} +#if ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE 0 +#endif -// -// array::iterator inline implementation -// -simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } -inline element array::iterator::operator*() const noexcept { - return element(tape); -} -inline array::iterator& array::iterator::operator++() noexcept { - tape.json_index = tape.after_element(); - return *this; -} -inline array::iterator array::iterator::operator++(int) noexcept { - array::iterator out = *this; - ++*this; - return out; -} -inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { - return tape.json_index != other.tape.json_index; -} -inline bool array::iterator::operator==(const array::iterator& other) const noexcept { - return tape.json_index == other.tape.json_index; -} -inline bool array::iterator::operator<(const array::iterator& other) const noexcept { - return tape.json_index < other.tape.json_index; -} -inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { - return tape.json_index <= other.tape.json_index; -} -inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { - return tape.json_index >= other.tape.json_index; -} -inline bool array::iterator::operator>(const array::iterator& other) const noexcept { - return tape.json_index > other.tape.json_index; -} +#endif -} // namespace dom +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif +#else -} // namespace simdjson +#if ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL 0 +#endif -/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +#endif -#if defined(__cpp_lib_ranges) -static_assert(std::ranges::view); -static_assert(std::ranges::sized_range); -#if SIMDJSON_EXCEPTIONS -static_assert(std::ranges::view>); -static_assert(std::ranges::sized_range>); -#endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) +// Default Westmere to on if this is x86-64. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif +#endif -#endif // SIMDJSON_ARRAY_INL_H -/* end file simdjson/dom/array-inl.h */ -/* skipped duplicate #include "simdjson/dom/document_stream-inl.h" */ -/* including simdjson/dom/document-inl.h: #include "simdjson/dom/document-inl.h" */ -/* begin file simdjson/dom/document-inl.h */ -#ifndef SIMDJSON_DOCUMENT_INL_H -#define SIMDJSON_DOCUMENT_INL_H +#if (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE 0 +#endif -// Inline implementations go in here. -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/document.h" */ -/* skipped duplicate #include "simdjson/dom/element-inl.h" */ -/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ -/* including simdjson/internal/jsonformatutils.h: #include "simdjson/internal/jsonformatutils.h" */ -/* begin file simdjson/internal/jsonformatutils.h */ -#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H -#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 1 +#else +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 0 +#endif + +#ifndef SIMDJSON_IMPLEMENTATION_LASX +#define SIMDJSON_IMPLEMENTATION_LASX (SIMDJSON_IS_LOONGARCH64 && __loongarch_asx) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LASX (SIMDJSON_IMPLEMENTATION_LASX) + +#ifndef SIMDJSON_IMPLEMENTATION_LSX +#if SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_IMPLEMENTATION_LSX 0 +#else +#define SIMDJSON_IMPLEMENTATION_LSX (SIMDJSON_IS_LOONGARCH64 && __loongarch_sx) +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_LSX (SIMDJSON_IMPLEMENTATION_LSX) + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 || SIMDJSON_CAN_ALWAYS_RUN_LSX || SIMDJSON_CAN_ALWAYS_RUN_LASX +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_LSX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lsx +#elif SIMDJSON_CAN_ALWAYS_RUN_LASX +#define SIMDJSON_BUILTIN_IMPLEMENTATION lasx +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) +#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H +/* end file simdjson/implementation_detection.h */ + +namespace simdjson { +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) + namespace arm64 {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) + namespace fallback {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) + namespace haswell {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) + namespace icelake {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) + namespace ppc64 {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) + namespace westmere {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) + namespace lsx {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) + namespace lasx {} +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + + /** + * Represents the best statically linked simdjson implementation that can be used by the compiling + * program. + * + * Detects what options the program is compiled against, and picks the minimum implementation that + * will work on any computer that can run the program. For example, if you compile with g++ + * -march=westmere, it will pick the westmere implementation. The haswell implementation will + * still be available, and can be selected at runtime, but the builtin implementation (and any + * code that uses it) will use westmere. + */ + namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_BASE_H +/* end file simdjson/builtin/base.h */ +/* including simdjson/builtin/implementation.h: #include "simdjson/builtin/implementation.h" */ +/* begin file simdjson/builtin/implementation.h */ +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION_H +#define SIMDJSON_BUILTIN_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/builtin/base.h" */ + +/* including simdjson/generic/dependencies.h: #include "simdjson/generic/dependencies.h" */ +/* begin file simdjson/generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H +#define SIMDJSON_GENERIC_DEPENDENCIES_H +// Internal headers needed for generics. +// All includes referencing simdjson headers *not* under simdjson/generic must be here! +// Otherwise, amalgamation will fail. /* skipped duplicate #include "simdjson/base.h" */ -#include -#include -#include +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/implementation_detection.h" */ +/* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ +/* begin file simdjson/internal/instruction_set.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H +#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H namespace simdjson { namespace internal { -inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); - -class escape_json_string { -public: - escape_json_string(std::string_view _str) noexcept : str{_str} {} - operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } -private: - std::string_view str; - friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000, + LSX = 0x20000, + LASX = 0x40000, }; -inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { - for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { - // TODO can this be done once at the beginning, or will it mess up << char? - std::ios::fmtflags f(out.flags()); - out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); - out.flags(f); - } else { - out << unescaped.str[i]; - } - } - } - return out; -} - } // namespace internal } // namespace simdjson -#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H -/* end file simdjson/internal/jsonformatutils.h */ +#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H +/* end file simdjson/internal/instruction_set.h */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +/* including simdjson/internal/jsoncharutils_tables.h: #include "simdjson/internal/jsoncharutils_tables.h" */ +/* begin file simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H -#include +/* skipped duplicate #include "simdjson/base.h" */ -namespace simdjson { -namespace dom { +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif -// -// document inline implementation -// -inline element document::root() const noexcept { - return element(internal::tape_ref(this, 1)); -} -simdjson_warn_unused -inline size_t document::capacity() const noexcept { - return allocated_capacity; -} +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d -simdjson_warn_unused -inline error_code document::allocate(size_t capacity) noexcept { - if (capacity == 0) { - string_buf.reset(); - tape.reset(); - allocated_capacity = 0; - return SUCCESS; - } +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; - // a pathological input like "[[[[..." would generate capacity tape elements, so - // need a capacity of at least capacity + 1, but it is also possible to do - // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" - //where capacity + 1 tape elements are - // generated, see issue https://github.com/simdjson/simdjson/issues/345 - size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); - // a document with only zero-length strings... could have capacity/3 string - // and we would need capacity/3 * 5 bytes on the string buffer - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); - tape.reset(new (std::nothrow) uint64_t[tape_capacity]); - if(!(string_buf && tape)) { - allocated_capacity = 0; - string_buf.reset(); - tape.reset(); - return MEMALLOC; - } - // Technically the allocated_capacity might be larger than capacity - // so the next line is pessimistic. - allocated_capacity = capacity; - return SUCCESS; -} +} // namespace internal +} // namespace simdjson -inline bool document::dump_raw_tape(std::ostream &os) const noexcept { - uint32_t string_length; - size_t tape_idx = 0; - uint64_t tape_val = tape[tape_idx]; - uint8_t type = uint8_t(tape_val >> 56); - os << tape_idx << " : " << type; - tape_idx++; - size_t how_many = 0; - if (type == 'r') { - how_many = size_t(tape_val & internal::JSON_VALUE_MASK); - } else { - // Error: no starting root node? - return false; - } - os << "\t// pointing to " << how_many << " (right after last node)\n"; - uint64_t payload; - for (; tape_idx < how_many; tape_idx++) { - os << tape_idx << " : "; - tape_val = tape[tape_idx]; - payload = tape_val & internal::JSON_VALUE_MASK; - type = uint8_t(tape_val >> 56); - switch (type) { - case '"': // we have a string - os << "string \""; - std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); - os << internal::escape_json_string(std::string_view( - reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), - string_length - )); - os << '"'; - os << '\n'; - break; - case 'l': // we have a long int - if (tape_idx + 1 >= how_many) { - return false; - } - os << "integer " << static_cast(tape[++tape_idx]) << "\n"; - break; - case 'u': // we have a long uint - if (tape_idx + 1 >= how_many) { - return false; - } - os << "unsigned integer " << tape[++tape_idx] << "\n"; - break; - case 'd': // we have a double - os << "float "; - if (tape_idx + 1 >= how_many) { - return false; - } - double answer; - std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); - os << answer << '\n'; - break; - case 'n': // we have a null - os << "null\n"; - break; - case 't': // we have a true - os << "true\n"; - break; - case 'f': // we have a false - os << "false\n"; - break; - case '{': // we have an object - os << "{\t// pointing to next tape location " << uint32_t(payload) - << " (first node after the scope), " - << " saturated count " - << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; - break; case '}': // we end an object - os << "}\t// pointing to previous tape location " << uint32_t(payload) - << " (start of the scope)\n"; - break; - case '[': // we start an array - os << "[\t// pointing to next tape location " << uint32_t(payload) - << " (first node after the scope), " - << " saturated count " - << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; - break; - case ']': // we end an array - os << "]\t// pointing to previous tape location " << uint32_t(payload) - << " (start of the scope)\n"; - break; - case 'r': // we start and end with the root node - // should we be hitting the root node? - return false; - default: - return false; - } - } - tape_val = tape[tape_idx]; - payload = tape_val & internal::JSON_VALUE_MASK; - type = uint8_t(tape_val >> 56); - os << tape_idx << " : " << type << "\t// pointing to " << payload - << " (start root)\n"; - return true; -} +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file simdjson/internal/jsoncharutils_tables.h */ +/* including simdjson/internal/numberparsing_tables.h: #include "simdjson/internal/numberparsing_tables.h" */ +/* begin file simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H -} // namespace dom -} // namespace simdjson +/* skipped duplicate #include "simdjson/base.h" */ -#endif // SIMDJSON_DOCUMENT_INL_H -/* end file simdjson/dom/document-inl.h */ -/* skipped duplicate #include "simdjson/dom/element-inl.h" */ -/* skipped duplicate #include "simdjson/dom/object-inl.h" */ -/* including simdjson/dom/parsedjson_iterator-inl.h: #include "simdjson/dom/parsedjson_iterator-inl.h" */ -/* begin file simdjson/dom/parsedjson_iterator-inl.h */ -#ifndef SIMDJSON_PARSEDJSON_ITERATOR_INL_H -#define SIMDJSON_PARSEDJSON_ITERATOR_INL_H +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/parsedjson_iterator.h" */ -/* skipped duplicate #include "simdjson/internal/jsonformatutils.h" */ +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; -/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ -/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ -#include -#include -#include -#include +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -namespace simdjson { +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ -// VS2017 reports deprecated warnings when you define a deprecated class's methods. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_DEPRECATED_WARNING -// Because of template weirdness, the actual class definition is inline in the document class -simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { - return location < tape_length; -} +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson -// useful for debugging purposes -size_t dom::parser::Iterator::get_tape_location() const { - return location; -} +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file simdjson/internal/numberparsing_tables.h */ +/* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ +/* begin file simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H -// useful for debugging purposes -size_t dom::parser::Iterator::get_tape_length() const { - return tape_length; -} +/* skipped duplicate #include "simdjson/base.h" */ -// returns the current depth (start at 1 with 0 reserved for the fictitious root -// node) -size_t dom::parser::Iterator::get_depth() const { - return depth; -} +#include -// A scope is a series of nodes at the same depth, typically it is either an -// object ({) or an array ([). The root node has type 'r'. -uint8_t dom::parser::Iterator::get_scope_type() const { - return depth_index[depth].scope_type; -} +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable -bool dom::parser::Iterator::move_forward() { - if (location + 1 >= tape_length) { - return false; // we are at the end! - } +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; - if ((current_type == '[') || (current_type == '{')) { - // We are entering a new scope - depth++; - assert(depth < max_depth); - depth_index[depth].start_of_scope = location; - depth_index[depth].scope_type = current_type; - } else if ((current_type == ']') || (current_type == '}')) { - // Leaving a scope. - depth--; - } else if (is_number()) { - // these types use 2 locations on the tape, not just one. - location += 1; - } +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; - location += 1; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; -} +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; -void dom::parser::Iterator::move_to_value() { - // assume that we are on a key, so move by 1. - location += 1; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); -} +} // namespace internal +} // namespace simdjson -bool dom::parser::Iterator::move_to_key(const char *key) { - if (down()) { - do { - const bool right_key = (strcmp(get_string(), key) == 0); - move_to_value(); - if (right_key) { - return true; - } - } while (next()); - up(); - } - return false; -} +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file simdjson/internal/simdprune_tables.h */ +#endif // SIMDJSON_GENERIC_DEPENDENCIES_H +/* end file simdjson/generic/dependencies.h */ -bool dom::parser::Iterator::move_to_key_insensitive( - const char *key) { - if (down()) { - do { - const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); - move_to_value(); - if (right_key) { - return true; - } - } while (next()); - up(); - } - return false; -} +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE -bool dom::parser::Iterator::move_to_key(const char *key, - uint32_t length) { - if (down()) { - do { - bool right_key = ((get_string_length() == length) && - (memcmp(get_string(), key, length) == 0)); - move_to_value(); - if (right_key) { - return true; - } - } while (next()); - up(); - } - return false; -} +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64/implementation.h: #include "simdjson/arm64/implementation.h" */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H -bool dom::parser::Iterator::move_to_index(uint32_t index) { - if (down()) { - uint32_t i = 0; - for (; i < index; i++) { - if (!next()) { - break; - } - } - if (i == index) { - return true; - } - up(); - } - return false; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -bool dom::parser::Iterator::prev() { - size_t target_location = location; - to_start_scope(); - size_t npos = location; - if (target_location == npos) { - return false; // we were already at the start - } - size_t oldnpos; - // we have that npos < target_location here - do { - oldnpos = npos; - if ((current_type == '[') || (current_type == '{')) { - // we need to jump - npos = uint32_t(current_val); - } else { - npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); - } - } while (npos < target_location); - location = oldnpos; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; -} +namespace simdjson { +namespace arm64 { -bool dom::parser::Iterator::up() { - if (depth == 1) { - return false; // don't allow moving back to root - } - to_start_scope(); - // next we just move to the previous value - depth--; - location -= 1; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; -} +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; -bool dom::parser::Iterator::down() { - if (location + 1 >= tape_length) { - return false; - } - if ((current_type == '[') || (current_type == '{')) { - size_t npos = uint32_t(current_val); - if (npos == location + 2) { - return false; // we have an empty scope - } - depth++; - assert(depth < max_depth); - location = location + 1; - depth_index[depth].start_of_scope = location; - depth_index[depth].scope_type = current_type; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; - } - return false; -} +} // namespace arm64 +} // namespace simdjson -void dom::parser::Iterator::to_start_scope() { - location = depth_index[depth].start_of_scope; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); -} +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/implementation.h: #include "simdjson/fallback/implementation.h" */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H -inline void dom::parser::Iterator::rewind() { - while (up()) - ; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +namespace simdjson { +namespace fallback { -bool dom::parser::Iterator::next() { - size_t npos; - if ((current_type == '[') || (current_type == '{')) { - // we need to jump - npos = uint32_t(current_val); - } else { - npos = location + (is_number() ? 2 : 1); - } - uint64_t next_val = doc.tape[npos]; - uint8_t next_type = uint8_t(next_val >> 56); - if ((next_type == ']') || (next_type == '}')) { - return false; // we reached the end of the scope - } - location = npos; - current_val = next_val; - current_type = next_type; - return true; -} -dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) - : doc(pj.doc) -{ -#if SIMDJSON_EXCEPTIONS - if (!pj.valid) { throw simdjson_error(pj.error); } -#else - if (!pj.valid) { return; } // abort() usage is forbidden in the library -#endif - - max_depth = pj.max_depth(); - depth_index = new scopeindex_t[max_depth + 1]; - depth_index[0].start_of_scope = location; - current_val = doc.tape[location++]; - current_type = uint8_t(current_val >> 56); - depth_index[0].scope_type = current_type; - tape_length = size_t(current_val & internal::JSON_VALUE_MASK); - if (location < tape_length) { - // If we make it here, then depth_capacity must >=2, but the compiler - // may not know this. - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - depth++; - assert(depth < max_depth); - depth_index[depth].start_of_scope = location; - depth_index[depth].scope_type = current_type; - } -} -dom::parser::Iterator::Iterator( - const dom::parser::Iterator &o) noexcept - : doc(o.doc), - max_depth(o.depth), - depth(o.depth), - location(o.location), - tape_length(o.tape_length), - current_type(o.current_type), - current_val(o.current_val) -{ - depth_index = new scopeindex_t[max_depth+1]; - std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); -} +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; -dom::parser::Iterator::~Iterator() noexcept { - if (depth_index) { delete[] depth_index; } -} +} // namespace fallback +} // namespace simdjson -bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { - if (!is_ok()) { - return false; - } - switch (current_type) { - case '"': // we have a string - os << '"'; - if (escape_strings) { - os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); - } else { - // was: os << get_string();, but given that we can include null chars, we - // have to do something crazier: - std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); - } - os << '"'; - break; - case 'l': // we have a long int - os << get_integer(); - break; - case 'u': - os << get_unsigned_integer(); - break; - case 'd': - os << get_double(); - break; - case 'n': // we have a null - os << "null"; - break; - case 't': // we have a true - os << "true"; - break; - case 'f': // we have a false - os << "false"; - break; - case '{': // we have an object - case '}': // we end an object - case '[': // we start an array - case ']': // we end an array - os << char(current_type); - break; - default: - return false; - } - return true; -} +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell/implementation.h: #include "simdjson/haswell/implementation.h" */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H -bool dom::parser::Iterator::move_to(const char *pointer, - uint32_t length) { - char *new_pointer = nullptr; - if (pointer[0] == '#') { - // Converting fragment representation to string representation - new_pointer = new char[length]; - uint32_t new_length = 0; - for (uint32_t i = 1; i < length; i++) { - if (pointer[i] == '%' && pointer[i + 1] == 'x') { -#if __cpp_exceptions - try { -#endif - int fragment = - std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); - if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { - // escaping the character - new_pointer[new_length] = '\\'; - new_length++; - } - new_pointer[new_length] = char(fragment); - i += 3; -#if __cpp_exceptions - } catch (std::invalid_argument &) { - delete[] new_pointer; - return false; // the fragment is invalid - } -#endif - } else { - new_pointer[new_length] = pointer[i]; - } - new_length++; - } - length = new_length; - pointer = new_pointer; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // saving the current state - size_t depth_s = depth; - size_t location_s = location; - uint8_t current_type_s = current_type; - uint64_t current_val_s = current_val; +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { - rewind(); // The json pointer is used from the root of the document. +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; - bool found = relative_move_to(pointer, length); - delete[] new_pointer; +} // namespace haswell +} // namespace simdjson - if (!found) { - // since the pointer has found nothing, we get back to the original - // position. - depth = depth_s; - location = location_s; - current_type = current_type_s; - current_val = current_val_s; - } +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake/implementation.h: #include "simdjson/icelake/implementation.h" */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H - return found; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -inline bool dom::parser::Iterator::move_to(const std::string &pointer) { - return move_to(pointer.c_str(), uint32_t(pointer.length())); -} +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { -inline int64_t dom::parser::Iterator::get_integer() const { - if (location + 1 >= tape_length) { - return 0; // default value in case of error - } - return static_cast(doc.tape[location + 1]); -} +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; -inline uint64_t dom::parser::Iterator::get_unsigned_integer() const { - if (location + 1 >= tape_length) { - return 0; // default value in case of error - } - return doc.tape[location + 1]; -} +} // namespace icelake +} // namespace simdjson -inline const char * dom::parser::Iterator::get_string() const { - return reinterpret_cast( - doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); -} +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64/implementation.h: #include "simdjson/ppc64/implementation.h" */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H -inline uint32_t dom::parser::Iterator::get_string_length() const { - uint32_t answer; - std::memcpy(&answer, - reinterpret_cast(doc.string_buf.get() + - (current_val & internal::JSON_VALUE_MASK)), - sizeof(uint32_t)); - return answer; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -inline double dom::parser::Iterator::get_double() const { - if (location + 1 >= tape_length) { - return std::numeric_limits::quiet_NaN(); // default value in - // case of error - } - double answer; - std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); - return answer; -} +namespace simdjson { -bool dom::parser::Iterator::relative_move_to(const char *pointer, - uint32_t length) { - if (length == 0) { - // returns the whole document - return true; - } +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { - if (pointer[0] != '/') { - // '/' must be the first character - return false; - } +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} - // finding the key in an object or the index in an array - std::string key_or_index; - uint32_t offset = 1; + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; - // checking for the "-" case - if (is_array() && pointer[1] == '-') { - if (length != 2) { - // the pointer must be exactly "/-" - // there can't be anything more after '-' as an index - return false; - } - key_or_index = '-'; - offset = length; // will skip the loop coming right after - } +} // namespace ppc64 +} // namespace simdjson - // We either transform the first reference token to a valid json key - // or we make sure it is a valid index in an array. - for (; offset < length; offset++) { - if (pointer[offset] == '/') { - // beginning of the next key or index - break; - } - if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { - // the index of an array must be an integer - // we also make sure std::stoi won't discard whitespaces later - return false; - } - if (pointer[offset] == '~') { - // "~1" represents "/" - if (pointer[offset + 1] == '1') { - key_or_index += '/'; - offset++; - continue; - } - // "~0" represents "~" - if (pointer[offset + 1] == '0') { - key_or_index += '~'; - offset++; - continue; - } - } - if (pointer[offset] == '\\') { - if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || - (pointer[offset + 1] <= 0x1F)) { - key_or_index += pointer[offset + 1]; - offset++; - continue; - } - return false; // invalid escaped character - } - if (pointer[offset] == '\"') { - // unescaped quote character. this is an invalid case. - // lets do nothing and assume most pointers will be valid. - // it won't find any corresponding json key anyway. - // return false; - } - key_or_index += pointer[offset]; - } - - bool found = false; - if (is_object()) { - if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { - found = relative_move_to(pointer + offset, length - offset); - } - } else if (is_array()) { - if (key_or_index == "-") { // handling "-" case first - if (down()) { - while (next()) - ; // moving to the end of the array - // moving to the nonexistent value right after... - size_t npos; - if ((current_type == '[') || (current_type == '{')) { - // we need to jump - npos = uint32_t(current_val); - } else { - npos = - location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); - } - location = npos; - current_val = doc.tape[npos]; - current_type = uint8_t(current_val >> 56); - return true; // how could it fail ? - } - } else { // regular numeric index - // The index can't have a leading '0' - if (key_or_index[0] == '0' && key_or_index.length() > 1) { - return false; - } - // it cannot be empty - if (key_or_index.length() == 0) { - return false; - } - // we already checked the index contains only valid digits - uint32_t index = std::stoi(key_or_index); - if (move_to_index(index)) { - found = relative_move_to(pointer + offset, length - offset); - } - } - } +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere/implementation.h: #include "simdjson/westmere/implementation.h" */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H - return found; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -SIMDJSON_POP_DISABLE_WARNINGS +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) +/* including simdjson/lsx/implementation.h: #include "simdjson/lsx/implementation.h" */ +/* begin file simdjson/lsx/implementation.h */ +#ifndef SIMDJSON_LSX_IMPLEMENTATION_H +#define SIMDJSON_LSX_IMPLEMENTATION_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif // SIMDJSON_PARSEDJSON_ITERATOR_INL_H -/* end file simdjson/dom/parsedjson_iterator-inl.h */ -/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ -/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ -/* including simdjson/dom/serialization-inl.h: #include "simdjson/dom/serialization-inl.h" */ -/* begin file simdjson/dom/serialization-inl.h */ +namespace simdjson { +namespace lsx { -#ifndef SIMDJSON_SERIALIZATION_INL_H -#define SIMDJSON_SERIALIZATION_INL_H +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lsx", "LoongArch SX", internal::instruction_set::LSX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; -/* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/serialization.h" */ -/* skipped duplicate #include "simdjson/dom/parser.h" */ -/* skipped duplicate #include "simdjson/internal/tape_type.h" */ +} // namespace lsx +} // namespace simdjson -/* skipped duplicate #include "simdjson/dom/array-inl.h" */ -/* skipped duplicate #include "simdjson/dom/object-inl.h" */ -/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ +#endif // SIMDJSON_LSX_IMPLEMENTATION_H +/* end file simdjson/lsx/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) +/* including simdjson/lasx/implementation.h: #include "simdjson/lasx/implementation.h" */ +/* begin file simdjson/lasx/implementation.h */ +#ifndef SIMDJSON_LASX_IMPLEMENTATION_H +#define SIMDJSON_LASX_IMPLEMENTATION_H -#include +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace dom { -inline bool parser::print_json(std::ostream &os) const noexcept { - if (!valid) { return false; } - simdjson::internal::string_builder<> sb; - sb.append(doc.root()); - std::string_view answer = sb.str(); - os << answer; - return true; -} - -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#endif -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#endif -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#endif +namespace lasx { -} // namespace dom - -/*** - * Number utility functions - **/ -namespace { -/**@private - * Escape sequence like \b or \u0001 - * We expect that most compilers will use 8 bytes for this data structure. - **/ -struct escape_sequence { - uint8_t length; - const char string[7]; // technically, we only ever need 6 characters, we pad to 8 -}; -/**@private - * This converts a signed integer into a character sequence. - * The caller is responsible for providing enough memory (at least - * 20 characters.) - * Though various runtime libraries provide itoa functions, - * it is not part of the C++ standard. The C++17 standard - * adds the to_chars functions which would do as well, but - * we want to support C++11. - */ -static char *fast_itoa(char *output, int64_t value) noexcept { - // This is a standard implementation of itoa. - char buffer[20]; - uint64_t value_positive; - // In general, negating a signed integer is unsafe. - if(value < 0) { - *output++ = '-'; - // Doing value_positive = -value; while avoiding - // undefined behavior warnings. - // It assumes two complement's which is universal at this - // point in time. - std::memcpy(&value_positive, &value, sizeof(value)); - value_positive = (~value_positive) + 1; // this is a negation - } else { - value_positive = value; - } - // We work solely with value_positive. It *might* be easier - // for an optimizing compiler to deal with an unsigned variable - // as far as performance goes. - const char *const end_buffer = buffer + 20; - char *write_pointer = buffer + 19; - // A faster approach is possible if we expect large integers: - // unroll the loop (work in 100s, 1000s) and use some kind of - // memoization. - while(value_positive >= 10) { - *write_pointer-- = char('0' + (value_positive % 10)); - value_positive /= 10; - } - *write_pointer = char('0' + value_positive); - size_t len = end_buffer - write_pointer; - std::memcpy(output, write_pointer, len); - return output + len; -} -/**@private - * This converts an unsigned integer into a character sequence. - * The caller is responsible for providing enough memory (at least - * 19 characters.) - * Though various runtime libraries provide itoa functions, - * it is not part of the C++ standard. The C++17 standard - * adds the to_chars functions which would do as well, but - * we want to support C++11. +/** + * @private */ -static char *fast_itoa(char *output, uint64_t value) noexcept { - // This is a standard implementation of itoa. - char buffer[20]; - const char *const end_buffer = buffer + 20; - char *write_pointer = buffer + 19; - // A faster approach is possible if we expect large integers: - // unroll the loop (work in 100s, 1000s) and use some kind of - // memoization. - while(value >= 10) { - *write_pointer-- = char('0' + (value % 10)); - value /= 10; - }; - *write_pointer = char('0' + value); - size_t len = end_buffer - write_pointer; - std::memcpy(output, write_pointer, len); - return output + len; -} +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("lasx", "LoongArch ASX", internal::instruction_set::LASX) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; +} // namespace lasx +} // namespace simdjson -} // anonymous namespace -namespace internal { +#endif // SIMDJSON_LASX_IMPLEMENTATION_H +/* end file simdjson/lasx/implementation.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif -/*** - * Minifier/formatter code. - **/ +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE -template -simdjson_inline void base_formatter::number(uint64_t x) { - char number_buffer[24]; - char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); -} +namespace simdjson { + /** + * Function which returns a pointer to an implementation matching the "builtin" implementation. + * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling + * program. If you compile with g++ -march=haswell, this will return the haswell implementation. + * It is handy to be able to check what builtin was used: builtin_implementation()->name(). + */ + const implementation * builtin_implementation(); +} // namespace simdjson -template -simdjson_inline void base_formatter::number(int64_t x) { - char number_buffer[24]; - char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); -} +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION_H +/* end file simdjson/builtin/implementation.h */ -template -simdjson_inline void base_formatter::number(double x) { - char number_buffer[24]; - // Currently, passing the nullptr to the second argument is - // safe because our implementation does not check the second - // argument. - char *newp = internal::to_chars(number_buffer, nullptr, x); - buffer.insert(buffer.end(), number_buffer, newp); -} +/* skipped duplicate #include "simdjson/generic/dependencies.h" */ -template -simdjson_inline void base_formatter::start_array() { one_char('['); } +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64.h: #include "simdjson/arm64.h" */ +/* begin file simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H -template -simdjson_inline void base_formatter::end_array() { one_char(']'); } +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H -template -simdjson_inline void base_formatter::start_object() { one_char('{'); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -simdjson_inline void base_formatter::end_object() { one_char('}'); } +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { -template -simdjson_inline void base_formatter::comma() { one_char(','); } +class implementation; -template -simdjson_inline void base_formatter::true_atom() { - const char * s = "true"; - buffer.insert(buffer.end(), s, s + 4); -} +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace -template -simdjson_inline void base_formatter::false_atom() { - const char * s = "false"; - buffer.insert(buffer.end(), s, s + 5); -} +} // namespace arm64 +} // namespace simdjson -template -simdjson_inline void base_formatter::null_atom() { - const char * s = "null"; - buffer.insert(buffer.end(), s, s + 4); -} +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H -template -simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -simdjson_inline void base_formatter::key(std::string_view unescaped) { - string(unescaped); - one_char(':'); -} +// This should be the correct header whether +// you use visual studio or other compilers. +#include -template -simdjson_inline void base_formatter::string(std::string_view unescaped) { - one_char('\"'); - size_t i = 0; - // Fast path for the case where we have no control character, no ", and no backslash. - // This should include most keys. - // - // We would like to use 'bool' but some compilers take offense to bitwise operation - // with bool types. - constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for(;i + 8 <= unescaped.length(); i += 8) { - // Poor's man vectorization. This could get much faster if we used SIMD. - // - // It is not the case that replacing '|' with '||' would be neutral performance-wise. - if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] - | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] - | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] - | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] - ) { break; } - } - for(;i < unescaped.length(); i++) { - if(needs_escaping[uint8_t(unescaped[i])]) { break; } - } - // The following is also possible and omits a 256-byte table, but it is slower: - // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) - // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); - // At least for long strings, the following should be fast. We could - // do better by integrating the checks and the insertion. - buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); - // We caught a control character if we enter this loop (slow). - // Note that we are do not restart from the beginning, but rather we continue - // from the point where we encountered something that requires escaping. - for (; i < unescaped.length(); i++) { - switch (unescaped[i]) { - case '\"': - { - const char * s = "\\\""; - buffer.insert(buffer.end(), s, s + 2); - } - break; - case '\\': - { - const char * s = "\\\\"; - buffer.insert(buffer.end(), s, s + 2); - } - break; - default: - if (uint8_t(unescaped[i]) <= 0x1F) { - // If packed, this uses 8 * 32 bytes. - // Note that we expect most compilers to embed this code in the data - // section. - constexpr static escape_sequence escaped[32] = { - {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, - {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, - {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, - {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, - {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, - {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, - {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, - {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; - auto u = escaped[uint8_t(unescaped[i])]; - buffer.insert(buffer.end(), u.string, u.string + u.length); - } else { - one_char(unescaped[i]); - } - } // switch - } // for - one_char('\"'); -} +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -inline void base_formatter::clear() { - buffer.clear(); -} +namespace simdjson { +namespace arm64 { +namespace { -template -simdjson_inline std::string_view base_formatter::str() const { - return std::string_view(buffer.data(), buffer.size()); +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -simdjson_inline void mini_formatter::print_newline() { - return; +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); } -simdjson_inline void mini_formatter::print_indents(size_t depth) { - (void)depth; - return; +// We sometimes call leading_zeroes on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +// Applies only when SIMDJSON_PREFER_REVERSE_BITS is defined and true. +// (See below.) +SIMDJSON_NO_SANITIZE_UNDEFINED +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO } -simdjson_inline void mini_formatter::print_space() { - return; +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } -simdjson_inline void pretty_formatter::print_newline() { - one_char('\n'); -} -simdjson_inline void pretty_formatter::print_indents(size_t depth) { - if(this->indent_step <= 0) { - return; - } - for(size_t i = 0; i < this->indent_step * depth; i++) { - one_char(' '); - } -} +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 -simdjson_inline void pretty_formatter::print_space() { - one_char(' '); +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; } -/*** - * String building code. +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} -template -inline void string_builder::append(simdjson::dom::element value) { - // using tape_type = simdjson::internal::tape_type; - size_t depth = 0; - constexpr size_t MAX_DEPTH = 16; - bool is_object[MAX_DEPTH]; - is_object[0] = false; - bool after_value = false; - - internal::tape_ref iter(value.tape); - do { - // print commas after each value - if (after_value) { - format.comma(); - format.print_newline(); - } +#endif - format.print_indents(depth); +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} - // If we are in an object, print the next key and :, and skip to the next - // value. - if (is_object[depth]) { - format.key(iter.get_string_view()); - format.print_space(); - iter.json_index++; - } - switch (iter.tape_ref_type()) { +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson - // Arrays - case tape_type::START_ARRAY: { - // If we're too deep, we need to recurse to go deeper. - depth++; - if (simdjson_unlikely(depth >= MAX_DEPTH)) { - append(simdjson::dom::array(iter)); - iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] - depth--; - break; - } - - // Output start [ - format.start_array(); - iter.json_index++; - - // Handle empty [] (we don't want to come back around and print commas) - if (iter.tape_ref_type() == tape_type::END_ARRAY) { - format.end_array(); - depth--; - break; - } - - is_object[depth] = false; - after_value = false; - format.print_newline(); - continue; - } - - // Objects - case tape_type::START_OBJECT: { - // If we're too deep, we need to recurse to go deeper. - depth++; - if (simdjson_unlikely(depth >= MAX_DEPTH)) { - append(simdjson::dom::object(iter)); - iter.json_index = iter.matching_brace_index() - 1; // Jump to the } - depth--; - break; - } - - // Output start { - format.start_object(); - iter.json_index++; - - // Handle empty {} (we don't want to come back around and print commas) - if (iter.tape_ref_type() == tape_type::END_OBJECT) { - format.end_object(); - depth--; - break; - } +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H - is_object[depth] = true; - after_value = false; - format.print_newline(); - continue; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Scalars - case tape_type::STRING: - format.string(iter.get_string_view()); - break; - case tape_type::INT64: - format.number(iter.next_tape_value()); - iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra - break; - case tape_type::UINT64: - format.number(iter.next_tape_value()); - iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra - break; - case tape_type::DOUBLE: - format.number(iter.next_tape_value()); - iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra - break; - case tape_type::TRUE_VALUE: - format.true_atom(); - break; - case tape_type::FALSE_VALUE: - format.false_atom(); - break; - case tape_type::NULL_VALUE: - format.null_atom(); - break; +namespace simdjson { +namespace arm64 { +namespace { - // These are impossible - case tape_type::END_ARRAY: - case tape_type::END_OBJECT: - case tape_type::ROOT: - SIMDJSON_UNREACHABLE(); - } - iter.json_index++; - after_value = true; +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} - // Handle multiple ends in a row - while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || - iter.tape_ref_type() == tape_type::END_OBJECT)) { - format.print_newline(); - depth--; - format.print_indents(depth); - if (iter.tape_ref_type() == tape_type::END_ARRAY) { - format.end_array(); - } else { - format.end_object(); - } - iter.json_index++; - } +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson - // Stop when we're at depth 0 - } while (depth != 0); +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H - format.print_newline(); -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -template -inline void string_builder::append(simdjson::dom::object value) { - format.start_object(); - auto pair = value.begin(); - auto end = value.end(); - if (pair != end) { - append(*pair); - for (++pair; pair != end; ++pair) { - format.comma(); - append(*pair); - } - } - format.end_object(); -} +#include -template -inline void string_builder::append(simdjson::dom::array value) { - format.start_array(); - auto iter = value.begin(); - auto end = value.end(); - if (iter != end) { - append(*iter); - for (++iter; iter != end; ++iter) { - format.comma(); - append(*iter); - } - } - format.end_array(); -} +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 -template -simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { - format.key(kv.key); - append(kv.value); -} +namespace simdjson { +namespace arm64 { +namespace numberparsing { -template -simdjson_inline void string_builder::clear() { - format.clear(); +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } -template -simdjson_inline std::string_view string_builder::str() const { - return format.str(); +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; } - -} // namespace internal +} // namespace numberparsing +} // namespace arm64 } // namespace simdjson +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif #endif -/* end file simdjson/dom/serialization-inl.h */ - -#endif // SIMDJSON_DOM_H -/* end file simdjson/dom.h */ -/* including simdjson/ondemand.h: #include "simdjson/ondemand.h" */ -/* begin file simdjson/ondemand.h */ -#ifndef SIMDJSON_ONDEMAND_H -#define SIMDJSON_ONDEMAND_H - -/* including simdjson/builtin/ondemand.h: #include "simdjson/builtin/ondemand.h" */ -/* begin file simdjson/builtin/ondemand.h */ -#ifndef SIMDJSON_BUILTIN_ONDEMAND_H -#define SIMDJSON_BUILTIN_ONDEMAND_H - -/* including simdjson/builtin.h: #include "simdjson/builtin.h" */ -/* begin file simdjson/builtin.h */ -#ifndef SIMDJSON_BUILTIN_H -#define SIMDJSON_BUILTIN_H - -/* including simdjson/builtin/base.h: #include "simdjson/builtin/base.h" */ -/* begin file simdjson/builtin/base.h */ -#ifndef SIMDJSON_BUILTIN_BASE_H -#define SIMDJSON_BUILTIN_BASE_H - -/* skipped duplicate #include "simdjson/base.h" */ -/* including simdjson/implementation_detection.h: #include "simdjson/implementation_detection.h" */ -/* begin file simdjson/implementation_detection.h */ -#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H -#define SIMDJSON_IMPLEMENTATION_DETECTION_H - -/* skipped duplicate #include "simdjson/base.h" */ -// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. -#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 -#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 -#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 -#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 -#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 -#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H -#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) -#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { -// -// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order -// in which we include them. -// +#if SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround -#ifndef SIMDJSON_IMPLEMENTATION_ARM64 -#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 -// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected -// at runtime. -#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE -#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) #endif - -#ifdef _MSC_VER -// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdjson/simdjson/issues/1247 -#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) -#else -#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) #endif -// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected -// at runtime. -#ifndef SIMDJSON_IMPLEMENTATION_HASWELL -#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -// if icelake is always available, never enable haswell. -#define SIMDJSON_IMPLEMENTATION_HASWELL 0 -#else -#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 -#endif +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) #endif -#ifdef _MSC_VER -// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdjson/simdjson/issues/1247 -#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) -#else -#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) #endif - -// Default Westmere to on if this is x86-64. -#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE -#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL -// if icelake or haswell are always available, never enable westmere. -#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 -#else -#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) #endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) #endif -#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) -#ifndef SIMDJSON_IMPLEMENTATION_PPC64 -#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -// Default Fallback to on unless a builtin implementation has already been selected. -#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK -#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 -// if anything at all except fallback can always run, then disable fallback. -#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 -#else -#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 -#endif -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK -// Determine the best builtin implementation -#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + template + struct simd8; -#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake -#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL -#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell -#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere -#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 -#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 -#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 -#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 -#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK -#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback -#else -#error "All possible implementations (including fallback) have been disabled! simdjson will not run." -#endif + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); -#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } -#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) -#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } -#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H -/* end file simdjson/implementation_detection.h */ + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } -namespace simdjson { -#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) - namespace arm64 {} -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) - namespace fallback {} -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) - namespace haswell {} -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) - namespace icelake {} -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) - namespace ppc64 {} -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) - namespace westmere {} -#else -#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION -#endif + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; - /** - * Represents the best statically linked simdjson implementation that can be used by the compiling - * program. - * - * Detects what options the program is compiled against, and picks the minimum implementation that - * will work on any computer that can run the program. For example, if you compile with g++ - * -march=westmere, it will pick the westmere implementation. The haswell implementation will - * still be available, and can be selected at runtime, but the builtin implementation (and any - * code that uses it) will use westmere. - */ - namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; -} // namespace simdjson + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; -#endif // SIMDJSON_BUILTIN_BASE_H -/* end file simdjson/builtin/base.h */ -/* including simdjson/builtin/implementation.h: #include "simdjson/builtin/implementation.h" */ -/* begin file simdjson/builtin/implementation.h */ -#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION_H -#define SIMDJSON_BUILTIN_IMPLEMENTATION_H + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } -/* skipped duplicate #include "simdjson/builtin/base.h" */ + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} -/* including simdjson/generic/dependencies.h: #include "simdjson/generic/dependencies.h" */ -/* begin file simdjson/generic/dependencies.h */ -#ifdef SIMDJSON_CONDITIONAL_INCLUDE -#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; #endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; -#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H -#define SIMDJSON_GENERIC_DEPENDENCIES_H + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } -// Internal headers needed for generics. -// All includes referencing simdjson headers *not* under simdjson/generic must be here! -// Otherwise, amalgamation will fail. -/* skipped duplicate #include "simdjson/base.h" */ -/* skipped duplicate #include "simdjson/implementation.h" */ -/* skipped duplicate #include "simdjson/implementation_detection.h" */ -/* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ -/* begin file simdjson/internal/instruction_set.h */ -/* From -https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h -Highly modified. - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, -Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute -(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, -Samy Bengio, Johnny Mariethoz) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories -America and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -*/ + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } -#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H -#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } -namespace simdjson { -namespace internal { + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } -enum instruction_set { - DEFAULT = 0x0, - NEON = 0x1, - AVX2 = 0x4, - SSE42 = 0x8, - PCLMULQDQ = 0x10, - BMI1 = 0x20, - BMI2 = 0x40, - ALTIVEC = 0x80, - AVX512F = 0x100, - AVX512DQ = 0x200, - AVX512IFMA = 0x400, - AVX512PF = 0x800, - AVX512ER = 0x1000, - AVX512CD = 0x2000, - AVX512BW = 0x4000, - AVX512VL = 0x8000, - AVX512VBMI2 = 0x10000 -}; + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } -} // namespace internal -} // namespace simdjson + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } -#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H -/* end file simdjson/internal/instruction_set.h */ -/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ -/* including simdjson/internal/jsoncharutils_tables.h: #include "simdjson/internal/jsoncharutils_tables.h" */ -/* begin file simdjson/internal/jsoncharutils_tables.h */ -#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H -#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H -/* skipped duplicate #include "simdjson/base.h" */ + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#if SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } -#ifdef JSON_TEST_STRINGS -void found_string(const uint8_t *buf, const uint8_t *parsed_begin, - const uint8_t *parsed_end); -void found_bad_string(const uint8_t *buf); + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#if SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; #endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } -namespace simdjson { -namespace internal { -// structural chars here are -// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) -// we are also interested in the four whitespace characters -// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } -extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; -extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; -extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; -} // namespace internal -} // namespace simdjson + // Signed bytes + template<> + struct simd8 { + int8x16_t value; -#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H -/* end file simdjson/internal/jsoncharutils_tables.h */ -/* including simdjson/internal/numberparsing_tables.h: #include "simdjson/internal/numberparsing_tables.h" */ -/* begin file simdjson/internal/numberparsing_tables.h */ -#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H -#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } -/* skipped duplicate #include "simdjson/base.h" */ + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } -namespace simdjson { -namespace internal { -/** - * The smallest non-zero float (binary64) is 2^-1074. - * We take as input numbers of the form w x 10^q where w < 2^64. - * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. - * However, we have that - * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. - * Thus it is possible for a number of the form w * 10^-342 where - * w is a 64-bit value to be a non-zero floating-point number. - ********* - * Any number of form w * 10^309 where w>= 1 is going to be - * infinite in binary64 so we never need to worry about powers - * of 5 greater than 308. - */ -constexpr int smallest_power = -342; -constexpr int largest_power = 308; + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -/** - * Represents a 128-bit value. - * low: least significant 64 bits. - * high: most significant 64 bits. - */ -struct value128 { - uint64_t low; - uint64_t high; -}; + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } -// Precomputed powers of ten from 10^0 to 10^22. These -// can be represented exactly using the double type. -extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } -/** - * When mapping numbers from decimal to binary, - * we go from w * 10^q to m * 2^p but we have - * 10^q = 5^q * 2^q, so effectively - * we are trying to match - * w * 2^q * 5^q to m * 2^p. Thus the powers of two - * are not a concern since they can be represented - * exactly using the binary notation, only the powers of five - * affect the binary significand. - */ + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } -// The truncated powers of five from 5^-342 all the way to 5^308 -// The mantissa is truncated to 128 bits, and -// never rounded up. Uses about 10KB. -extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; -} // namespace internal -} // namespace simdjson + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; -#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H -/* end file simdjson/internal/numberparsing_tables.h */ -/* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ -/* begin file simdjson/internal/simdprune_tables.h */ -#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H -#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -/* skipped duplicate #include "simdjson/base.h" */ + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -#include + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} -namespace simdjson { // table modified and copied from -namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } -extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } -extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; -// 256 * 8 bytes = 2kB, easily fits in cache. -extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } -} // namespace internal -} // namespace simdjson + simdjson_inline uint64_t to_bitmask() const { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } -#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H -/* end file simdjson/internal/simdprune_tables.h */ + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } -#endif // SIMDJSON_GENERIC_DEPENDENCIES_H -/* end file simdjson/generic/dependencies.h */ + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 -/* defining SIMDJSON_CONDITIONAL_INCLUDE */ -#define SIMDJSON_CONDITIONAL_INCLUDE +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson -#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) -/* including simdjson/arm64/implementation.h: #include "simdjson/arm64/implementation.h" */ -/* begin file simdjson/arm64/implementation.h */ -#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H -#define SIMDJSON_ARM64_IMPLEMENTATION_H +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { +namespace { -/** - * @private - */ -class implementation final : public simdjson::implementation { +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { public: - simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} +} // unnamed namespace } // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_ARM64_IMPLEMENTATION_H -/* end file simdjson/arm64/implementation.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) -/* including simdjson/fallback/implementation.h: #include "simdjson/fallback/implementation.h" */ -/* begin file simdjson/fallback/implementation.h */ -#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H -#define SIMDJSON_FALLBACK_IMPLEMENTATION_H +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace arm64 { + +struct open_container; +class dom_parser_implementation; /** - * @private + * The type of a JSON number */ -class implementation final : public simdjson::implementation { -public: - simdjson_inline implementation() : simdjson::implementation( - "fallback", - "Generic fallback implementation", - 0 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word }; -} // namespace fallback +} // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H -/* end file simdjson/fallback/implementation.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) -/* including simdjson/haswell/implementation.h: #include "simdjson/haswell/implementation.h" */ -/* begin file simdjson/haswell/implementation.h */ -#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H -#define SIMDJSON_HASWELL_IMPLEMENTATION_H +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for arm64 */ +/* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for arm64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { -namespace haswell { +namespace arm64 { +namespace { +namespace jsoncharutils { -/** - * @private - */ -class implementation final : public simdjson::implementation { -public: - simdjson_inline implementation() : simdjson::implementation( - "haswell", - "Intel/AMD AVX2", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} -} // namespace haswell +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H -/* end file simdjson/haswell/implementation.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) -/* including simdjson/icelake/implementation.h: #include "simdjson/icelake/implementation.h" */ -/* begin file simdjson/icelake/implementation.h */ -#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H -#define SIMDJSON_ICELAKE_IMPLEMENTATION_H +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for arm64 */ +/* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +#include + namespace simdjson { -namespace icelake { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { -/** - * @private - */ -class implementation final : public simdjson::implementation { -public: - simdjson_inline implementation() : simdjson::implementation( - "icelake", - "Intel/AMD AVX512", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } -} // namespace icelake -} // namespace simdjson -#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H -/* end file simdjson/icelake/implementation.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) -/* including simdjson/ppc64/implementation.h: #include "simdjson/ppc64/implementation.h" */ -/* begin file simdjson/ppc64/implementation.h */ -#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H -#define SIMDJSON_PPC64_IMPLEMENTATION_H +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} -namespace simdjson { +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 { +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} -/** - * @private - */ -class implementation final : public simdjson::implementation { -public: - simdjson_inline implementation() - : simdjson::implementation("ppc64", "PPC64 ALTIVEC", - internal::instruction_set::ALTIVEC) {} +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, size_t max_length, - std::unique_ptr &dst) - const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, - uint8_t *dst, - size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -}; +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} -} // namespace ppc64 +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_PPC64_IMPLEMENTATION_H -/* end file simdjson/ppc64/implementation.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) -/* including simdjson/westmere/implementation.h: #include "simdjson/westmere/implementation.h" */ -/* begin file simdjson/westmere/implementation.h */ -#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H -#define SIMDJSON_WESTMERE_IMPLEMENTATION_H +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for arm64 */ +/* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { -namespace westmere { +namespace arm64 { -/** - * @private - */ -class implementation final : public simdjson::implementation { +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { public: - simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + }; -} // namespace westmere +} // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H -/* end file simdjson/westmere/implementation.h */ -#else -#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION -#endif +namespace simdjson { +namespace arm64 { -/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ -#undef SIMDJSON_CONDITIONAL_INCLUDE +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -namespace simdjson { - /** - * Function which returns a pointer to an implementation matching the "builtin" implementation. - * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling - * program. If you compile with g++ -march=haswell, this will return the haswell implementation. - * It is handy to be able to check what builtin was used: builtin_implementation()->name(). - */ - const implementation * builtin_implementation(); -} // namespace simdjson +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; -#endif // SIMDJSON_BUILTIN_IMPLEMENTATION_H -/* end file simdjson/builtin/implementation.h */ + _capacity = capacity; + return SUCCESS; +} -/* skipped duplicate #include "simdjson/generic/dependencies.h" */ +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } -/* defining SIMDJSON_CONDITIONAL_INCLUDE */ -#define SIMDJSON_CONDITIONAL_INCLUDE + _max_depth = max_depth; + return SUCCESS; +} -#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) -/* including simdjson/arm64.h: #include "simdjson/arm64.h" */ -/* begin file simdjson/arm64.h */ -#ifndef SIMDJSON_ARM64_H -#define SIMDJSON_ARM64_H +} // namespace arm64 +} // namespace simdjson -/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ -/* begin file simdjson/arm64/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ -#define SIMDJSON_IMPLEMENTATION arm64 -/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ -/* begin file simdjson/arm64/base.h */ -#ifndef SIMDJSON_ARM64_BASE_H -#define SIMDJSON_ARM64_BASE_H +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for arm64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -/** - * Implementation for NEON (ARMv8). - */ namespace arm64 { -class implementation; - -namespace { -namespace simd { -template struct simd8; -template struct simd8x64; -} // namespace simd -} // unnamed namespace - -} // namespace arm64 -} // namespace simdjson - -#endif // SIMDJSON_ARM64_BASE_H -/* end file simdjson/arm64/base.h */ -/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ -/* begin file simdjson/arm64/intrinsics.h */ -#ifndef SIMDJSON_ARM64_INTRINSICS_H -#define SIMDJSON_ARM64_INTRINSICS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); - -#endif // SIMDJSON_ARM64_INTRINSICS_H -/* end file simdjson/arm64/intrinsics.h */ -/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ -/* begin file simdjson/arm64/bitmanipulation.h */ -#ifndef SIMDJSON_ARM64_BITMANIPULATION_H -#define SIMDJSON_ARM64_BITMANIPULATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace arm64 { -namespace { +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; -/* result might be undefined when input_num is zero */ -simdjson_inline int count_ones(uint64_t input_num) { - return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); -} + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; -#if defined(__GNUC__) // catches clang and gcc -/** - * ARM has a fast 64-bit "bit reversal function" that is handy. However, - * it is not generally available as an intrinsic function under Visual - * Studio (though this might be changing). Even under clang/gcc, we - * apparently need to invoke inline assembly. - */ -/* - * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that - * work well with bit reversal may use it. - */ -#define SIMDJSON_PREFER_REVERSE_BITS 1 + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; -/* reverse the bits */ -simdjson_inline uint64_t reverse_bits(uint64_t input_num) { - uint64_t rev_bits; - __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); - return rev_bits; -} + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; -/** - * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, - * then this will set to zero the leading bit. It is possible for leading_zeroes to be - * greating or equal to 63 in which case we trigger undefined behavior, but the output - * of such undefined behavior is never used. - **/ -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { - return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); -} +#if SIMDJSON_EXCEPTIONS -#endif + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); -#endif // SIMDJSON_ARM64_BITMANIPULATION_H -/* end file simdjson/arm64/bitmanipulation.h */ -/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ -/* begin file simdjson/arm64/bitmask.h */ -#ifndef SIMDJSON_ARM64_BITMASK_H -#define SIMDJSON_ARM64_BITMASK_H + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace arm64 { -namespace { +#endif // SIMDJSON_EXCEPTIONS -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { - ///////////// - // We could do this with PMULL, but it is apparently slow. - // - //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension - //return vmull_p64(-1ULL, bitmask); - //#else - // Analysis by @sebpop: - // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out - // in between other vector code, so effectively the extra cycles of the sequence do not matter - // because the GPR units are idle otherwise and the critical path is on the FP side. - // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) - // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) - /////////// - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; -} + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base -} // unnamed namespace } // namespace arm64 } // namespace simdjson -#endif -/* end file simdjson/arm64/bitmask.h */ -/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ -/* begin file simdjson/arm64/numberparsing_defs.h */ -#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H -#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +/* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include #include -#if _M_ARM64 -// __umulh requires intrin.h -#include -#endif // _M_ARM64 - namespace simdjson { namespace arm64 { namespace numberparsing { -// we don't have SSE, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) #else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) #endif - return answer; -} - -} // namespace numberparsing -} // namespace arm64 -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H -/* end file simdjson/arm64/numberparsing_defs.h */ -/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ -/* begin file simdjson/arm64/simd.h */ -#ifndef SIMDJSON_ARM64_SIMD_H -#define SIMDJSON_ARM64_SIMD_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace arm64 { -namespace { -namespace simd { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO namespace { -// Start of private section with Visual Studio workaround - -#ifndef simdjson_make_uint8x16_t -#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ - x13, x14, x15, x16) \ - ([=]() { \ - uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_u8(array); \ - }()) -#endif -#ifndef simdjson_make_int8x16_t -#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ - x13, x14, x15, x16) \ - ([=]() { \ - int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_s8(array); \ - }()) -#endif +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} -#ifndef simdjson_make_uint8x8_t -#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_u8(array); \ - }()) -#endif -#ifndef simdjson_make_int8x8_t -#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_s8(array); \ - }()) -#endif -#ifndef simdjson_make_uint16x8_t -#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_u16(array); \ - }()) +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." #endif -#ifndef simdjson_make_int16x8_t -#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_s16(array); \ - }()) +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) #endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html -// End of private section with Visual Studio workaround -} // namespace -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + // The fast path has now failed, so we are failing back on the slower path. + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } - template - struct simd8; + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. // - // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. // - template> - struct base_u8 { - uint8x16_t value; - static const int SIZE = sizeof(value); + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - // Conversion from/to SIMD register - simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdjson_inline operator const uint8x16_t&() const { return this->value; } - simdjson_inline operator uint8x16_t&() { return this->value; } - // Bit operations - simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } - simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } - simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } - simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_u8(prev_chunk, *this, 16 - N); - } - }; + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); - static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } - simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} - // False constructor - simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} - // Splat constructor - simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + mantissa += mantissa & 1; + mantissa >>= 1; - // We return uint32_t instead of uint16_t because that seems to be more efficient for most - // purposes (cutting it down to uint16_t costs performance in some compilers). - simdjson_inline uint32_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - auto minput = *this & bit_mask; - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); - } - simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } - }; + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} - // Unsigned bytes - template<> - struct simd8: base_u8 { - static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } - static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } - static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. - simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} - // Zero constructor - simdjson_inline simd8() : simd8(zero()) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(simdjson_make_uint8x16_t( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} -#else - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(uint8x16_t{ - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - }) {} -#endif + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. - // Store to array - simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} - // Order-specific operations - simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } - simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } - simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } - simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } - // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} - // Bit-specific operations - simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } - template - simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } - template - simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint16_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; - uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); - // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); -#else - uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; -#endif - shufmask = vaddq_u8(shufmask, inc); - // this is the version "nearly pruned" - uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); - vst1q_u8(reinterpret_cast(output), answer); - } + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. - // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a - // bitset) to output1, then those corresponding to a 0 in the high half to output2. - template - simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { - using internal::thintable_epi8; - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); - uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); - // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); -#else - uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; -#endif - compactmask2 = vadd_u8(compactmask2, inc); - // store each result (with the second store possibly overlapping the first) - vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); - vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); - } + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. - template - simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { - return vqtbl1q_u8(*this, simd8(original)); - } - }; + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} - // Signed bytes - template<> - struct simd8 { - int8x16_t value; +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} - static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } - static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } - static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} - // Conversion from/to SIMD register - simdjson_inline simd8(const int8x16_t _value) : value{_value} {} - simdjson_inline operator const int8x16_t&() const { return this->value; } - simdjson_inline operator int8x16_t&() { return this->value; } +} // unnamed namespace - // Zero constructor - simdjson_inline simd8() : simd8(zero()) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(simdjson_make_int8x16_t( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} -#else - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(int8x16_t{ - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - }) {} -#endif - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Store to array - simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} - // Explicit conversion to/from unsigned +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 // - // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. - // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 - // and relatively ugly and hard to read. -#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} -#endif - simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } - - // Math - simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } - - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } - simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } - - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_s8(prev_chunk, *this, 16 - N); + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} - // Perform a lookup assuming no value is larger than 16 - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); - template - simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { - return vqtbl1q_s8(*this, simd8(original)); - } - }; +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); - // compute the prefix sum of the popcounts of each byte - uint64_t offsets = popcounts * 0x0101010101010101; - this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); - this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); - this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); - this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); - return offsets >> 56; - } + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } - simdjson_inline uint64_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = simdjson_make_uint8x16_t( - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - ); -#else - const uint8x16_t bit_mask = { - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - }; -#endif - // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. - uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); - uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } +// Inlineable functions +namespace { - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -} // namespace simd -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; -#endif // SIMDJSON_ARM64_SIMD_H -/* end file simdjson/arm64/simd.h */ -/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ -/* begin file simdjson/arm64/stringparsing_defs.h */ -#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H -#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -namespace simdjson { -namespace arm64 { -namespace { + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -using namespace simd; + return i; +} -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return bs_bits != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we - // smash them together into a 64-byte mask and get the bitmask from there. - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; + return i; } -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H -/* end file simdjson/arm64/stringparsing_defs.h */ - -#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 -/* end file simdjson/arm64/begin.h */ -/* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for arm64 */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -/* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for arm64 */ -#ifndef SIMDJSON_GENERIC_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } -namespace simdjson { -namespace arm64 { + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -struct open_container; -class dom_parser_implementation; + return i; +} -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -} // namespace arm64 -} // namespace simdjson + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for arm64 */ -/* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for arm64 */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -namespace simdjson { -namespace arm64 { -namespace { -namespace jsoncharutils { + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -} // namespace jsoncharutils -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for arm64 */ -/* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for arm64 */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -#include + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -namespace simdjson { -namespace arm64 { -namespace { -/// @private -namespace atomparsing { + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + exponent += exp_neg ? 0-exp : exp; + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -} // namespace atomparsing -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for arm64 */ -/* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -namespace simdjson { -namespace arm64 { + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container + exponent += exp_neg ? 0-exp : exp; + } -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -}; + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -} // namespace arm64 -} // namespace simdjson + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -namespace simdjson { -namespace arm64 { + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - _capacity = capacity; - return SUCCESS; + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING - _max_depth = max_depth; - return SUCCESS; +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; } } // namespace arm64 } // namespace simdjson -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for arm64 */ -/* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for arm64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace arm64 { -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ +// +// internal::implementation_simdjson_result_base inline implementation +// + template -struct implementation_simdjson_result_base { +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; +#if SIMDJSON_EXCEPTIONS - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} -#if SIMDJSON_EXCEPTIONS +#endif // SIMDJSON_EXCEPTIONS - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} +} // namespace arm64 +} // namespace simdjson -#endif // SIMDJSON_EXCEPTIONS +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +/* end file simdjson/generic/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ -} // namespace arm64 -} // namespace simdjson +#endif // SIMDJSON_ARM64_H +/* end file simdjson/arm64.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback.h: #include "simdjson/fallback.h" */ +/* begin file simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ -/* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for arm64 */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include -#include +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { -namespace numberparsing { +namespace fallback { +namespace { -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} #endif +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; } -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif - { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - +} // unnamed namespace +} // namespace fallback +} // namespace simdjson - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. +#include - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } +namespace simdjson { +namespace fallback { +namespace numberparsing { - mantissa += mantissa & 1; - mantissa >>= 1; +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); } -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; } -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} +/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_BASE_H -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} +namespace simdjson { +namespace fallback { -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well +struct open_container; +class dom_parser_implementation; - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. +} // namespace fallback +} // namespace simdjson - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for fallback */ +/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for fallback */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { -} // unnamed namespace +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; - } - return INVALID_NUMBER(src); +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; } -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; + // will return 0 when the code point was too large. + return 0; // bad r } -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; } +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for fallback */ +/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#include - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); } -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} - return i; +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for fallback */ +/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - return i; -} +namespace simdjson { +namespace fallback { -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; - return i; -} + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +}; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +} // namespace fallback +} // namespace simdjson -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +namespace simdjson { +namespace fallback { - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; + _capacity = capacity; + return SUCCESS; +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; + _max_depth = max_depth; + return SUCCESS; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +} // namespace fallback +} // namespace simdjson - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } +namespace simdjson { +namespace fallback { - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; - exponent += exp_neg ? 0-exp : exp; - } + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; -} +#if SIMDJSON_EXCEPTIONS -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +#endif // SIMDJSON_EXCEPTIONS - exponent += exp_neg ? 0-exp : exp; - } + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +} // namespace fallback +} // namespace simdjson - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +#include +#include +#include - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +namespace simdjson { +namespace fallback { +namespace numberparsing { - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; } - } else { - overflow = p-src > 19; + if (negative) { + d = -d; + } + return true; } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + // The fast path has now failed, so we are failing back on the slower path. - exponent += exp_neg ? 0-exp : exp; + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; } - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. // - // Assemble (or slow-parse) the float + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} - -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; -} -} // namespace arm64 -} // namespace simdjson + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for arm64 */ -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); -namespace simdjson { -namespace arm64 { + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } -// -// internal::implementation_simdjson_result_base inline implementation -// + mantissa += mantissa & 1; + mantissa >>= 1; -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; } + d = to_double(mantissa, real_exponent, negative); + return true; } -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -#if SIMDJSON_EXCEPTIONS +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); } -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; } -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} -} // namespace arm64 -} // namespace simdjson +} // unnamed namespace -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ -/* end file simdjson/generic/amalgamated.h for arm64 */ -/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ -/* begin file simdjson/arm64/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/arm64/end.h */ - -#endif // SIMDJSON_ARM64_H -/* end file simdjson/arm64.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) -/* including simdjson/fallback.h: #include "simdjson/fallback.h" */ -/* begin file simdjson/fallback.h */ -#ifndef SIMDJSON_FALLBACK_H -#define SIMDJSON_FALLBACK_H +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} -/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ -/* begin file simdjson/fallback/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ -#define SIMDJSON_IMPLEMENTATION fallback -/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ -/* begin file simdjson/fallback/base.h */ -#ifndef SIMDJSON_FALLBACK_BASE_H -#define SIMDJSON_FALLBACK_BASE_H +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING -namespace simdjson { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback { +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} -class implementation; +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else -} // namespace fallback -} // namespace simdjson +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -#endif // SIMDJSON_FALLBACK_BASE_H -/* end file simdjson/fallback/base.h */ -/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ -/* begin file simdjson/fallback/bitmanipulation.h */ -#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H -#define SIMDJSON_FALLBACK_BITMANIPULATION_H + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } -namespace simdjson { -namespace fallback { -namespace { + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; -} -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; -} -#endif + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// _MSC_VER + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; } -} // unnamed namespace -} // namespace fallback -} // namespace simdjson - -#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H -/* end file simdjson/fallback/bitmanipulation.h */ -/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ -/* begin file simdjson/fallback/stringparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +// Inlineable functions +namespace { -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -namespace simdjson { -namespace fallback { -namespace { +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - simdjson_inline bool has_quote_first() { return c == '"'; } - simdjson_inline bool has_backslash() { return c == '\\'; } - simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - uint8_t c; -}; // struct backslash_and_quote + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // store to dest unconditionally - we can overwrite the bits we don't like later - dst[0] = src[0]; - return { src[0] }; + return i; } -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -/* end file simdjson/fallback/stringparsing_defs.h */ -/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ -/* begin file simdjson/fallback/numberparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -#include + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif + return i; +} -namespace simdjson { -namespace fallback { -namespace numberparsing { +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - return parse_eight_digits_unrolled(reinterpret_cast(chars)); -} + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; + return i; } -#endif -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -} // namespace numberparsing -} // namespace fallback -} // namespace simdjson +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -#define SIMDJSON_SWAR_NUMBER_PARSING 1 + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -/* end file simdjson/fallback/numberparsing_defs.h */ -/* end file simdjson/fallback/begin.h */ -/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for fallback */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for fallback */ -#ifndef SIMDJSON_GENERIC_BASE_H +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } -namespace simdjson { -namespace fallback { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -struct open_container; -class dom_parser_implementation; +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -} // namespace fallback -} // namespace simdjson + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for fallback */ -/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for fallback */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -namespace simdjson { -namespace fallback { -namespace { -namespace jsoncharutils { + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} + exponent += exp_neg ? 0-exp : exp; + } -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; } - // will return 0 when the code point was too large. - return 0; // bad r + return d; } -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); } -#endif -} // namespace jsoncharutils -} // unnamed namespace -} // namespace fallback -} // namespace simdjson +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for fallback */ -/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for fallback */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -#include + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -namespace simdjson { -namespace fallback { -namespace { -/// @private -namespace atomparsing { + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} + exponent += exp_neg ? 0-exp : exp; + } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; } -} // namespace atomparsing } // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + } // namespace fallback } // namespace simdjson -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for fallback */ -/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for fallback */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container +// +// internal::implementation_simdjson_result_base inline implementation +// -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); +#if SIMDJSON_EXCEPTIONS -}; +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} -} // namespace fallback -} // namespace simdjson +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} -namespace simdjson { -namespace fallback { +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; +#endif // SIMDJSON_EXCEPTIONS - _capacity = capacity; - return SUCCESS; +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} - _max_depth = max_depth; - return SUCCESS; +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); } +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + } // namespace fallback } // namespace simdjson -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for fallback */ -/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H - +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* end file simdjson/generic/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace fallback { - -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; +#endif // SIMDJSON_FALLBACK_H +/* end file simdjson/fallback.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell.h: #include "simdjson/haswell.h" */ +/* begin file simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; +class implementation; -#if SIMDJSON_EXCEPTIONS +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); +} // namespace haswell +} // namespace simdjson - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO -#endif // SIMDJSON_EXCEPTIONS +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ -} // namespace fallback -} // namespace simdjson +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// We enable bmi2 only if LLVM/clang is used, because GCC may not +// make good use of it. See https://github.com/simdjson/simdjson/pull/2243 +#if defined(__clang__) +SIMDJSON_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt") +#else +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif +#endif -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ -/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for fallback */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include -#include - namespace simdjson { -namespace fallback { -namespace numberparsing { +namespace haswell { +namespace { -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} -namespace { +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); } -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} #endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); #else - if (-22 <= power && power <= 22 && i <= 9007199254740991) + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); #endif - { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html +} - // The fast path has now failed, so we are failing back on the slower path. +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; - } +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; +namespace simdjson { +namespace haswell { +namespace { +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } +namespace simdjson { +namespace haswell { +namespace numberparsing { - mantissa += mantissa & 1; - mantissa >>= 1; +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; } -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +#define SIMDJSON_SWAR_NUMBER_PARSING 1 -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} +namespace simdjson { +namespace haswell { +namespace { +namespace simd { -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; + // Zero constructor + simdjson_inline base() : value{__m256i()} {} -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. + // Forward-declared so they can be used by splat and friends. + template + struct simd8; - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -} // unnamed namespace + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; - } - return INVALID_NUMBER(src); -} + static const int SIZE = sizeof(base::value); -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} + }; -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; -// Inlineable functions -namespace { + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } - return i; -} + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - return i; -} + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } - return i; -} + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +} // namespace simd - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } +namespace simdjson { +namespace haswell { +namespace { - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +} // unnamed namespace +} // namespace haswell +} // namespace simdjson - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_BASE_H - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +namespace simdjson { +namespace haswell { - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +struct open_container; +class dom_parser_implementation; - exponent += exp_neg ? 0-exp : exp; - } +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +} // namespace haswell +} // namespace simdjson - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for haswell */ +/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for haswell */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r } -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; } - -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; } +#endif +} // namespace jsoncharutils } // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing - -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; -} - -} // namespace fallback +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for fallback */ - -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for haswell */ +/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace fallback { - -// -// internal::implementation_simdjson_result_base inline implementation -// - -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} +#include -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); } -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } -#endif // SIMDJSON_EXCEPTIONS - -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} - -} // namespace fallback +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ -/* end file simdjson/generic/amalgamated.h for fallback */ -/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ -/* begin file simdjson/fallback/end.h */ +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for haswell */ +/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/fallback/end.h */ - -#endif // SIMDJSON_FALLBACK_H -/* end file simdjson/fallback.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) -/* including simdjson/haswell.h: #include "simdjson/haswell.h" */ -/* begin file simdjson/haswell.h */ -#ifndef SIMDJSON_HASWELL_H -#define SIMDJSON_HASWELL_H +namespace simdjson { +namespace haswell { -/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ -/* begin file simdjson/haswell/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ -#define SIMDJSON_IMPLEMENTATION haswell +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container -/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ -/* begin file simdjson/haswell/base.h */ -#ifndef SIMDJSON_HASWELL_BASE_H -#define SIMDJSON_HASWELL_BASE_H +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL -namespace simdjson { -/** - * Implementation for Haswell (Intel AVX2). - */ -namespace haswell { + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; -class implementation; + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); -namespace { -namespace simd { -template struct simd8; -template struct simd8x64; -} // namespace simd -} // unnamed namespace +}; } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_HASWELL_BASE_H -/* end file simdjson/haswell/base.h */ -/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ -/* begin file simdjson/haswell/intrinsics.h */ -#ifndef SIMDJSON_HASWELL_INTRINSICS_H -#define SIMDJSON_HASWELL_INTRINSICS_H +namespace simdjson { +namespace haswell { -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO + _capacity = capacity; + return SUCCESS; +} -static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } -#endif // SIMDJSON_HASWELL_INTRINSICS_H -/* end file simdjson/haswell/intrinsics.h */ + _max_depth = max_depth; + return SUCCESS; +} -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") -#endif +} // namespace haswell +} // namespace simdjson -/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ -/* begin file simdjson/haswell/bitmanipulation.h */ -#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H -#define SIMDJSON_HASWELL_BITMANIPULATION_H +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for haswell */ +/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { -namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; -} // unnamed namespace -} // namespace haswell -} // namespace simdjson + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; -#endif // SIMDJSON_HASWELL_BITMANIPULATION_H -/* end file simdjson/haswell/bitmanipulation.h */ -/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ -/* begin file simdjson/haswell/bitmask.h */ -#ifndef SIMDJSON_HASWELL_BITMASK_H -#define SIMDJSON_HASWELL_BITMASK_H + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; -namespace simdjson { -namespace haswell { -namespace { + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} +#if SIMDJSON_EXCEPTIONS -} // unnamed namespace -} // namespace haswell -} // namespace simdjson + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); -#endif // SIMDJSON_HASWELL_BITMASK_H -/* end file simdjson/haswell/bitmask.h */ -/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ -/* begin file simdjson/haswell/numberparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); -namespace simdjson { -namespace haswell { -namespace numberparsing { -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} +#endif // SIMDJSON_EXCEPTIONS -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base -} // namespace numberparsing } // namespace haswell } // namespace simdjson -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -/* end file simdjson/haswell/numberparsing_defs.h */ -/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ -/* begin file simdjson/haswell/simd.h */ -#ifndef SIMDJSON_HASWELL_SIMD_H -#define SIMDJSON_HASWELL_SIMD_H +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include +#include + namespace simdjson { namespace haswell { -namespace { -namespace simd { - - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m256i value; +namespace numberparsing { - // Zero constructor - simdjson_inline base() : value{__m256i()} {} +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif - // Conversion from SIMD register - simdjson_inline base(const __m256i _value) : value(_value) {} +namespace { - // Conversion to SIMD register - simdjson_inline operator const __m256i&() const { return this->value; } - simdjson_inline operator __m256i&() { return this->value; } +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - // Forward-declared so they can be used by splat and friends. - template - struct simd8; + // The fast path has now failed, so we are failing back on the slower path. - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m256i _value) : base>(_value) {} - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - static const int SIZE = sizeof(base::value); - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); - } - }; + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. - simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; } - - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} - - // Store to array - simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } - - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint32_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in four steps, first 8 bytes and then second 8 bytes... - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits - uint8_t mask3 = uint8_t(mask >> 16); // ... - uint8_t mask4 = uint8_t(mask >> 24); // ... - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], - thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask and so forth - shufmask = - _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, - 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); - // we still need to put the pieces back together. - // we compute the popcount of the first words: - int pop1 = BitsSetTable256mul2[mask1]; - int pop3 = BitsSetTable256mul2[mask3]; - - // then load the corresponding mask - // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. - __m256i v256 = _mm256_castsi128_si256( - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); - __m256i compactmask = _mm256_insertf128_si256(v256, - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); - __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); - // We just need to write out the result. - // This is the tricky bit that is hard to do - // if we want to return a SIMD register, since there - // is no single-instruction approach to recombine - // the two 128-bit lanes with an offset. - __m128i v128; - v128 = _mm256_castsi256_si128(almostthere); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); - v128 = _mm256_extractf128_si256(almostthere, 1); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); - } - - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up } - }; + } - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + mantissa += mantissa & 1; + mantissa >>= 1; - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } - }; + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } - }; + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - uint32_t mask1 = uint32_t(mask); - uint32_t mask2 = uint32_t(mask >> 32); - this->chunks[0].compress(mask1, output); - this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); - return 64 - count_ones(mask); - } +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - } +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} - simdjson_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well - simdjson_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask - ); - } + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask - ).to_bitmask(); - } + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1] - ).to_bitmask(); - } + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} -} // namespace simd +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} } // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_SIMD_H -/* end file simdjson/haswell/simd.h */ -/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ -/* begin file simdjson/haswell/stringparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace haswell { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast((v == '\\').to_bitmask()), // bs_bits - static_cast((v == '"').to_bitmask()), // quote_bits - }; +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); } -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -/* end file simdjson/haswell/stringparsing_defs.h */ -/* end file simdjson/haswell/begin.h */ -/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for haswell */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif - -/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for haswell */ -#ifndef SIMDJSON_GENERIC_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace haswell { - -struct open_container; -class dom_parser_implementation; - -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; - -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for haswell */ -/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for haswell */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} -namespace simdjson { -namespace haswell { -namespace { -namespace jsoncharutils { +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds } -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing // -// Note: we assume that surrogates are treated separately +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. // -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; } - // will return 0 when the code point was too large. - return 0; // bad r -} -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } -} // namespace jsoncharutils -} // unnamed namespace -} // namespace haswell -} // namespace simdjson + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for haswell */ -/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for haswell */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H +// Inlineable functions +namespace { -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); -#include +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; -namespace simdjson { -namespace haswell { -namespace { -/// @private -namespace atomparsing { +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); + return i; } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; + return i; } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for haswell */ -/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } -namespace simdjson { -namespace haswell { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + return i; +} -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -}; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); -} // namespace haswell -} // namespace simdjson + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } -namespace simdjson { -namespace haswell { + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } - _capacity = capacity; - return SUCCESS; + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); - _max_depth = max_depth; - return SUCCESS; -} + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } -} // namespace haswell -} // namespace simdjson + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for haswell */ -/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; -namespace simdjson { -namespace haswell { + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { + exponent += exp_neg ? 0-exp : exp; + } - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); -#if SIMDJSON_EXCEPTIONS + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + exponent += exp_neg ? 0-exp : exp; + } -#endif // SIMDJSON_EXCEPTIONS + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base - -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ -/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for haswell */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include -#include - -namespace simdjson { -namespace haswell { -namespace numberparsing { - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { - -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif - { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; } + return d; +} - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power + // Check for minus sign // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. + // Parse the integer part. // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // Parse the decimal part. // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; + } else { + overflow = p-src > 19; } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. + // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // Parse the exponent // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; } - mantissa += mantissa & 1; - mantissa >>= 1; + if (*p != '"') { return NUMBER_ERROR; } - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; } - d = to_double(mantissa, real_exponent, negative); - return true; + return d; } -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; } -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. +} // namespace haswell +} // namespace simdjson - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for haswell */ -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well +#if SIMDJSON_EXCEPTIONS - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); } -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); } -} // unnamed namespace +#endif // SIMDJSON_EXCEPTIONS -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; - } - return INVALID_NUMBER(src); +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; } -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; } -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); } -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +} // namespace haswell +} // namespace simdjson - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +/* end file simdjson/generic/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } +#endif // SIMDJSON_HASWELL_H +/* end file simdjson/haswell.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake.h: #include "simdjson/icelake.h" */ +/* begin file simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Inlineable functions -namespace { +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); +class implementation; -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; +} // namespace icelake +} // namespace simdjson -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO - return i; -} +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H - return i; -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +namespace simdjson { +namespace icelake { +namespace { - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); } +#endif -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +} // unnamed namespace +} // namespace icelake +} // namespace simdjson - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } +namespace simdjson { +namespace icelake { +namespace { - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +} // unnamed namespace +} // namespace icelake +} // namespace simdjson - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - exponent += exp_neg ? 0-exp : exp; - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +namespace simdjson { +namespace icelake { +namespace { +namespace simd { - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} + // Zero constructor + simdjson_inline base() : value{__m512i()} {} -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; -} + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); + // Forward-declared so they can be used by splat and friends. + template + struct simd8; - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); } - } else { - overflow = p-src > 19; - } - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + static const int SIZE = sizeof(base::value); - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; - exponent += exp_neg ? 0-exp : exp; - } + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask + } -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -} // namespace numberparsing + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); } - return out; -} -} // namespace haswell -} // namespace simdjson + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for haswell */ + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -namespace simdjson { -namespace haswell { + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } -// -// internal::implementation_simdjson_result_base inline implementation -// + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -#if SIMDJSON_EXCEPTIONS + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } -#endif // SIMDJSON_EXCEPTIONS + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; -} + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; -} + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); -} + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} +} // namespace simd -} // namespace haswell +} // unnamed namespace +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ -/* end file simdjson/generic/amalgamated.h for haswell */ -/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ -/* begin file simdjson/haswell/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_UNTARGET_REGION -#endif - -/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/haswell/end.h */ - -#endif // SIMDJSON_HASWELL_H -/* end file simdjson/haswell.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) -/* including simdjson/icelake.h: #include "simdjson/icelake.h" */ -/* begin file simdjson/icelake.h */ -#ifndef SIMDJSON_ICELAKE_H -#define SIMDJSON_ICELAKE_H - -/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ -/* begin file simdjson/icelake/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ -#define SIMDJSON_IMPLEMENTATION icelake -/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ -/* begin file simdjson/icelake/base.h */ -#ifndef SIMDJSON_ICELAKE_BASE_H -#define SIMDJSON_ICELAKE_BASE_H +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE namespace simdjson { -/** - * Implementation for Icelake (Intel AVX512). - */ namespace icelake { +namespace { -class implementation; - -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_BASE_H -/* end file simdjson/icelake/base.h */ -/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ -/* begin file simdjson/icelake/intrinsics.h */ -#ifndef SIMDJSON_ICELAKE_INTRINSICS_H -#define SIMDJSON_ICELAKE_INTRINSICS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +using namespace simd; -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// Important: we need the AVX-512 headers: -#include -#include -#include -#include -#include -#include -#include -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } -static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote -#endif // SIMDJSON_ICELAKE_INTRINSICS_H -/* end file simdjson/icelake/intrinsics.h */ +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") -#endif +} // unnamed namespace +} // namespace icelake +} // namespace simdjson -/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ -/* begin file simdjson/icelake/bitmanipulation.h */ -#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H -#define SIMDJSON_ICELAKE_BITMANIPULATION_H +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { -namespace { +namespace numberparsing { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif +#define SIMDJSON_SWAR_NUMBER_PARSING 1 -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! #endif -} -} // unnamed namespace +/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H -/* end file simdjson/icelake/bitmanipulation.h */ -/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ -/* begin file simdjson/icelake/bitmask.h */ -#ifndef SIMDJSON_ICELAKE_BITMASK_H -#define SIMDJSON_ICELAKE_BITMASK_H +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for icelake */ +/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for icelake */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register // -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid // -// For example, prefix_xor(00100100) == 00011100 +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. // -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; } +#endif +} // namespace jsoncharutils } // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BITMASK_H -/* end file simdjson/icelake/bitmask.h */ -/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ -/* begin file simdjson/icelake/simd.h */ -#ifndef SIMDJSON_ICELAKE_SIMD_H -#define SIMDJSON_ICELAKE_SIMD_H +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for icelake */ +/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ == 8 -#define SIMDJSON_GCC8 1 -#endif // __GNUC__ == 8 -#endif // defined(__GNUC__) && !defined(__clang__) - -#if SIMDJSON_GCC8 -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); -} -#endif // SIMDJSON_GCC8 - - +#include namespace simdjson { namespace icelake { namespace { -namespace simd { +/// @private +namespace atomparsing { - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m512i value; +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - // Zero constructor - simdjson_inline base() : value{__m512i()} {} - // Conversion from SIMD register - simdjson_inline base(const __m512i _value) : value(_value) {} +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} - // Conversion to SIMD register - simdjson_inline operator const __m512i&() const { return this->value; } - simdjson_inline operator __m512i&() { return this->value; } +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} - // Forward-declared so they can be used by splat and friends. - template - struct simd8; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m512i _value) : base>(_value) {} +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} - friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { - return _mm512_cmpeq_epi8_mask(lhs, rhs); - } +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} - static const int SIZE = sizeof(base::value); +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) - constexpr int shift = 16 - N; - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); - } - }; +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for icelake */ +/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m512i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +namespace simdjson { +namespace icelake { - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_inline simd8 load(const T values[64]) { - return _mm512_loadu_si512(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - // Store to array - simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm512_shuffle_epi8(lookup_table, *this); - } +}; - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); - } +} // namespace icelake +} // namespace simdjson - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +namespace simdjson { +namespace icelake { - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, - int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, - int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, - int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, - int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } - - simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } - }; - - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, - uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, - uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, - uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, - uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } - - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } - - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - - simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { - return !_mm512_test_epi8_mask(*this, *this); - } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} - - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(mask, output); - return 64 - count_ones(mask); - } - - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - } - - simdjson_inline simd8 reduce_or() const { - return this->chunks[0]; - } - - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask - ); - } +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] == mask; - } +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return this->chunks[0] == other.chunks[0]; - } + _capacity = capacity; + return SUCCESS; +} - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] <= mask; - } - }; // struct simd8x64 +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } -} // namespace simd + _max_depth = max_depth; + return SUCCESS; +} -} // unnamed namespace } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_ICELAKE_SIMD_H -/* end file simdjson/icelake/simd.h */ -/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ -/* begin file simdjson/icelake/stringparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for icelake */ +/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - uint64_t bs_bits; - uint64_t quote_bits; -}; // struct backslash_and_quote +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast(v == '\\'), // bs_bits - static_cast(v == '"'), // quote_bits - }; -} + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; -} // unnamed namespace -} // namespace icelake -} // namespace simdjson + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; -#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -/* end file simdjson/icelake/stringparsing_defs.h */ -/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ -/* begin file simdjson/icelake/numberparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; -namespace simdjson { -namespace icelake { -namespace numberparsing { + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; -} // namespace numberparsing -} // namespace icelake -} // namespace simdjson +#if SIMDJSON_EXCEPTIONS -#define SIMDJSON_SWAR_NUMBER_PARSING 1 + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); -#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -/* end file simdjson/icelake/numberparsing_defs.h */ -/* end file simdjson/icelake/begin.h */ -/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for icelake */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); -/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for icelake */ -#ifndef SIMDJSON_GENERIC_BASE_H + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); -namespace simdjson { -namespace icelake { -struct open_container; -class dom_parser_implementation; +#endif // SIMDJSON_EXCEPTIONS -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for icelake */ -/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for icelake */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include +#include + namespace simdjson { namespace icelake { -namespace { -namespace jsoncharutils { +namespace numberparsing { -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} +namespace { -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for icelake */ -/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for icelake */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include - -namespace simdjson { -namespace icelake { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for icelake */ -/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace icelake { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace icelake -} // namespace simdjson - -namespace simdjson { -namespace icelake { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for icelake */ -/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace icelake { - -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; - - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; - - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; - - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; - - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; - - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS - - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); - - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); - - -#endif // SIMDJSON_EXCEPTIONS - - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base - -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ -/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for icelake */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include -#include - -namespace simdjson { -namespace icelake { -namespace numberparsing { - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { - -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; } // Attempts to compute i * 10^(power) exactly; and if "negative" is @@ -19002,7 +18935,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -19222,6 +19155,10 @@ simdjson_inline bool parse_digit(const uint8_t c, I &i) { return true; } +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer @@ -19279,7 +19216,7 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger @@ -19301,6 +19238,23 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s return SUCCESS; } +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. @@ -19375,6 +19329,18 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, return SUCCESS; } +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING @@ -19406,7 +19372,6 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { - // // Check for minus sign // @@ -19453,11 +19418,11 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -19476,7 +19441,7 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } - // Write unsigned if it doesn't fit in a signed integer. + // Write unsigned if it does not fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { @@ -19927,19 +19892,32 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); if ( p == src ) { return NUMBER_ERROR; } if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). // We want values larger or equal to 9223372036854775808 to be unsigned // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; } return number_type::signed_integer; } @@ -20117,6 +20095,7 @@ inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; default: SIMDJSON_UNREACHABLE(); } return out; @@ -20478,13 +20457,13 @@ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -20497,7 +20476,13 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } // namespace ppc64 } // namespace simdjson +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else #define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif #endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H /* end file simdjson/ppc64/numberparsing_defs.h */ @@ -20601,11 +20586,11 @@ template <> struct simd8 : base8 { return (__m128i)vec_splats((unsigned char)(-(!!_value))); } - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor - simdjson_inline simd8(bool _value) + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} simdjson_inline int to_bitmask() const { @@ -21074,6 +21059,10 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { /* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ /* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ /* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ /* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ /* amalgamation skipped (editor-only): #else */ @@ -21094,7 +21083,8 @@ class dom_parser_implementation; enum class number_type { floating_point_number=1, /// a binary64 number signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word }; } // namespace ppc64 @@ -21542,11 +21532,13 @@ namespace numberparsing { #define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) #else #define INVALID_NUMBER(SRC) (NUMBER_ERROR) #define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) #define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) #define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) #endif namespace { @@ -21657,7 +21649,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -21877,6 +21869,10 @@ simdjson_inline bool parse_digit(const uint8_t c, I &i) { return true; } +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer @@ -21934,7 +21930,7 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s // If there were more than 18 digits, we may have overflowed the integer. We have to do // something!!!! if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow while (*start_exp == '0') { start_exp++; } // 19 digits could overflow int64_t and is kind of absurd anyway. We don't // support exponents smaller than -999,999,999,999,999,999 and bigger @@ -21956,6 +21952,23 @@ simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const s return SUCCESS; } +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. @@ -22030,6 +22043,18 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, return SUCCESS; } +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING @@ -22061,7 +22086,6 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // Our objective is accurate parsing (ULP of 0) at high speed. template simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { - // // Check for minus sign // @@ -22108,11 +22132,11 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // The longest positive 64-bit number is 20 digits. // We do it this way so we don't trigger this branch unless we must. size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } if (digit_count == longest_digit_count) { if (negative) { // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } WRITE_INTEGER(~i+1, src, writer); if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -22131,7 +22155,7 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } } - // Write unsigned if it doesn't fit in a signed integer. + // Write unsigned if it does not fit in a signed integer. if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { @@ -22209,3727 +22233,31546 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR}; -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for ppc64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +/* end file simdjson/generic/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_H +/* end file simdjson/ppc64.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere.h: #include "simdjson/westmere.h" */ +/* begin file simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for westmere */ +/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for westmere */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for westmere */ +/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for westmere */ +/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for westmere */ +/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for westmere */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +/* end file simdjson/generic/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_H +/* end file simdjson/westmere.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) +/* including simdjson/lsx.h: #include "simdjson/lsx.h" */ +/* begin file simdjson/lsx.h */ +#ifndef SIMDJSON_LSX_H +#define SIMDJSON_LSX_H + +/* including simdjson/lsx/begin.h: #include "simdjson/lsx/begin.h" */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LSX. + */ +namespace lsx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); + +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lsx +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lsx/begin.h */ +/* including simdjson/generic/amalgamated.h for lsx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lsx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lsx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lsx */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lsx */ +/* including simdjson/generic/jsoncharutils.h for lsx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lsx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lsx */ +/* including simdjson/generic/atomparsing.h for lsx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lsx { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lsx */ +/* including simdjson/generic/dom_parser_implementation.h for lsx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lsx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lsx +} // namespace simdjson + +namespace simdjson { +namespace lsx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lsx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lsx */ +/* including simdjson/generic/numberparsing.h for lsx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lsx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace lsx { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lsx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lsx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lsx */ +/* end file simdjson/generic/amalgamated.h for lsx */ +/* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_LSX_H +/* end file simdjson/lsx.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) +/* including simdjson/lasx.h: #include "simdjson/lasx.h" */ +/* begin file simdjson/lasx.h */ +#ifndef SIMDJSON_LASX_H +#define SIMDJSON_LASX_H + +/* including simdjson/lasx/begin.h: #include "simdjson/lasx/begin.h" */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { + return __builtin_ctzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return __builtin_clzll(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); +} + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including simdjson/generic/amalgamated.h for lasx: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for lasx */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for lasx: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for lasx */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LSX */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_LASX */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer, /// a positive integer larger or equal to 1<<63 + big_integer /// a big integer that does not fit in a 64-bit word +}; + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for lasx */ +/* including simdjson/generic/jsoncharutils.h for lasx: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for lasx */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for lasx */ +/* including simdjson/generic/atomparsing.h for lasx: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for lasx */ +/* including simdjson/generic/dom_parser_implementation.h for lasx: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for lasx */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace lasx +} // namespace simdjson + +namespace simdjson { +namespace lasx { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for lasx */ +/* including simdjson/generic/implementation_simdjson_result_base.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for lasx */ +/* including simdjson/generic/numberparsing.h for lasx: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for lasx */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#define BIGINT_NUMBER(SRC) (found_invalid_number((SRC)), BIGINT_ERROR) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#define BIGINT_NUMBER(SRC) (BIGINT_ERROR) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximately equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without + // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product + // is sufficiently accurate, and more computation is not needed. + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline bool is_digit(const uint8_t c) { + return static_cast(c - '0') <= 9; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and does not overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline bool check_if_integer(const uint8_t *const src, size_t max_length) { + const uint8_t *const srcend = src + max_length; + bool negative = (*src == '-'); // we can always read at least one character after the '-' + const uint8_t *p = src + uint8_t(negative); + if(p == srcend) { return false; } + if(*p == '0') { + ++p; + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; + } + while(p != srcend && is_digit(*p)) { ++p; } + if(p == srcend) { return true; } + if(jsoncharutils::is_not_structural_or_whitespace(*p)) { return false; } + return true; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { + if (parse_float_fallback(src, answer)) { + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer + // reference to it, it would force it to be stored in memory, preventing the compiler from + // picking it apart and putting into registers. i.e. if we pass it as reference, + // it gets slow. + double d; + error_code error = slow_float_parsing(src, &d); + writer.append_double(d); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return BIGINT_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return BIGINT_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it does not fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + size_t digit_count = size_t(p - src); + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + static const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + // We have an integer. + if(simdjson_unlikely(digit_count > 20)) { + return number_type::big_integer; + } + // If the number is negative and valid, it must be a signed integer. + if(negative) { + if (simdjson_unlikely(digit_count > 19)) return number_type::big_integer; + if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { + return number_type::big_integer; + } + return number_type::signed_integer; + } + // Let us check if we have a big integer (>=2**64). + static const uint8_t * two_to_sixtyfour = reinterpret_cast("18446744073709551616"); + if((digit_count > 20) || (digit_count == 20 && memcmp(src, two_to_sixtyfour, 20) >= 0)) { + return number_type::big_integer; + } + // The number is positive and smaller than 18446744073709551616 (or 2**64). + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + if((digit_count == 20) || (digit_count >= 19 && memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = p-start_digits > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + case number_type::big_integer: out << "big integer"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for lasx */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for lasx: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for lasx */ +/* end file simdjson/generic/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ + +#endif // SIMDJSON_LASX_H +/* end file simdjson/lasx.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +#endif // SIMDJSON_BUILTIN_H +/* end file simdjson/builtin.h */ +/* skipped duplicate #include "simdjson/builtin/base.h" */ + +/* including simdjson/generic/ondemand/dependencies.h: #include "simdjson/generic/ondemand/dependencies.h" */ +/* begin file simdjson/generic/ondemand/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/ondemand/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +#define SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H + +// Internal headers needed for ondemand generics. +// All includes not under simdjson/generic/ondemand must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* skipped duplicate #include "simdjson/padded_string_view.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +/* skipped duplicate #include "simdjson/jsonpathutil.h" */ + +#endif // SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +/* end file simdjson/generic/ondemand/dependencies.h */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64/ondemand.h: #include "simdjson/arm64/ondemand.h" */ +/* begin file simdjson/arm64/ondemand.h */ +#ifndef SIMDJSON_ARM64_ONDEMAND_H +#define SIMDJSON_ARM64_ONDEMAND_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +// We sometimes call leading_zeroes on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +// Applies only when SIMDJSON_PREFER_REVERSE_BITS is defined and true. +// (See below.) +SIMDJSON_NO_SANITIZE_UNDEFINED +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 +// __umulh requires intrin.h +#include +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO && SIMDJSON_IS_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +#ifndef simdjson_make_uint8x16_t +#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x16_t +#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_s8(array); \ + }()) +#endif + +#ifndef simdjson_make_uint8x8_t +#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_u8(array); \ + }()) +#endif +#ifndef simdjson_make_int8x8_t +#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1_s8(array); \ + }()) +#endif +#ifndef simdjson_make_uint16x8_t +#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_u16(array); \ + }()) +#endif +#ifndef simdjson_make_int16x8_t +#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ + ([=]() { \ + int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ + return vld1q_s16(array); \ + }()) +#endif + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(simdjson_make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#if SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#if SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#if SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(simdjson_make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = simdjson_make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for arm64: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for arm64: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::arm64::number_type */ +using number_type = simdjson::arm64::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for arm64 */ +/* including simdjson/generic/ondemand/deserialize.h for arm64: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for arm64 */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = arm64::ondemand::value; + using document_type = arm64::ondemand::document; + using document_reference_type = arm64::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for arm64 */ +/* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/value.h for arm64: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { + +namespace arm64 { +namespace ondemand { +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator arm64::ondemand::array() noexcept(false); + simdjson_inline operator arm64::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for arm64 */ +/* including simdjson/generic/ondemand/logger.h for arm64: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for arm64 */ +/* including simdjson/generic/ondemand/token_iterator.h for arm64: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/json_iterator.h for arm64: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/json_type.h for arm64: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for arm64 */ +/* including simdjson/generic/ondemand/raw_json_string.h for arm64: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +/* including simdjson/generic/ondemand/parser.h for arm64: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_pure simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_pure simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; + #endif + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for arm64 */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for arm64: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for arm64 */ +/* including simdjson/generic/ondemand/array_iterator.h for arm64: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/document.h for arm64: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator arm64::ondemand::array() & noexcept(false); + simdjson_inline operator arm64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator arm64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator arm64::ondemand::array() & noexcept(false); + simdjson_inline operator arm64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator arm64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for arm64 */ +/* including simdjson/generic/ondemand/document_stream.h for arm64: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct simdjson::internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for arm64 */ +/* including simdjson/generic/ondemand/field.h for arm64: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for arm64 */ +/* including simdjson/generic/ondemand/object.h for arm64: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for arm64 */ +/* including simdjson/generic/ondemand/object_iterator.h for arm64: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/serialization.h for arm64: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace arm64 { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::arm64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for arm64 */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for arm64: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for arm64 */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + arm64::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + arm64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for arm64 */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for arm64: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for arm64 */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::array_iterator &&value +) noexcept + : arm64::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : arm64::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/value-inl.h for arm64: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator arm64::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for arm64 */ +/* including simdjson/generic/ondemand/document-inl.h for arm64: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(arm64::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(arm64::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for arm64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for arm64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +/* including simdjson/generic/ondemand/field-inl.h for arm64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} + +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for arm64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +/* including simdjson/generic/ondemand/logger-inl.h for arm64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for arm64 */ +/* including simdjson/generic/ondemand/object-inl.h for arm64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for arm64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/parser-inl.h for arm64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for arm64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for arm64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace arm64 { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for arm64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace arm64::ondemand; + arm64::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + arm64::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + arm64::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace arm64 { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::arm64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; +} + +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ + + +/* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_ONDEMAND_H +/* end file simdjson/arm64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ +/* begin file simdjson/fallback/ondemand.h */ +#ifndef SIMDJSON_FALLBACK_ONDEMAND_H +#define SIMDJSON_FALLBACK_ONDEMAND_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::fallback::number_type */ +using number_type = simdjson::fallback::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for fallback */ +/* including simdjson/generic/ondemand/deserialize.h for fallback: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for fallback */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = fallback::ondemand::value; + using document_type = fallback::ondemand::document; + using document_reference_type = fallback::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; + friend class field; +}; // value_iterator + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for fallback */ +/* including simdjson/generic/ondemand/value.h for fallback: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { + +namespace fallback { +namespace ondemand { +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + * + * See also value::raw_json(). + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Get a string_view pointing at this value in the JSON document. + * If this element is an array or an object, it consumes the array or the object + * and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + * If this element is a scalar (string, number, Boolean, null), it returns what + * raw_json_token() would return. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; + friend class field; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator fallback::ondemand::array() noexcept(false); + simdjson_inline operator fallback::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field as not there when they are not in order). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for fallback */ +/* including simdjson/generic/ondemand/logger.h for fallback: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator.h for fallback: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator.h for fallback: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it is not used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend class field; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for fallback */ +/* including simdjson/generic/ondemand/json_type.h for fallback: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string.h for fallback: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for fallback */ +/* including simdjson/generic/ondemand/parser.h for fallback: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. If there is a UTF-8 BOM, the parser skips it. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * If there is a UTF-8 BOM, the parser skips it. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_pure simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_pure simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; + #endif + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for fallback */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for fallback: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for fallback */ +/* including simdjson/generic/ondemand/array_iterator.h for fallback: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for fallback */ +/* including simdjson/generic/ondemand/document.h for fallback: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Attempts to fill the provided std::string reference with the parsed value of the current string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * Performance: This method may be slower than get_string() or get_string(bool) because it may need to allocate memory. + * We recommend you avoid allocating an std::string unless you need to. + * + * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. + */ + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is set to 1 (which is the case when building in Debug mode + * by default), and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * You must not have begun iterating through the object or array. When + * SIMDJSON_DEVELOPMENT_CHECKS is defined, and you have already begun iterating, + * you will get an OUT_OF_ORDER_ITERATION error. If you have begun iterating, you can use + * rewind() to reset the document to its initial state before calling this method. + * + * @returns A value value if a JSON array or object cannot be found. + * @exception SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator fallback::ondemand::array() & noexcept(false); + simdjson_inline operator fallback::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator fallback::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; +#if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator fallback::ondemand::array() & noexcept(false); + simdjson_inline operator fallback::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator fallback::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for fallback */ +/* including simdjson/generic/ondemand/document_stream.h for fallback: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace fallback { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = simdjson_result; + using pointer = void; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the parser skips it. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct simdjson::internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for fallback */ +/* including simdjson/generic/ondemand/field.h for fallback: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for fallback */ +/* including simdjson/generic/ondemand/object.h for fallback: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field was not there when they are not in order). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * If you expect to have keys with escape characters, please review our documentation. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator.h for fallback: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for fallback */ +/* including simdjson/generic/ondemand/serialization.h for fallback: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace fallback { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::fallback::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for fallback */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for fallback: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for fallback */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + fallback::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + fallback::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for fallback */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for fallback: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for fallback */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::array_iterator &&value +) noexcept + : fallback::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : fallback::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/value-inl.h for fallback: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for fallback */ +/* including simdjson/generic/ondemand/document-inl.h for fallback: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} + +} // namespace simdjson + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(fallback::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(fallback::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} - return i; +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } +} // namespace simdjson -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for fallback */ +/* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } +#include +#include - return i; -} +namespace simdjson { +namespace fallback { +namespace ondemand { -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#ifdef SIMDJSON_THREADS_ENABLED - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} - return i; + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +#endif // SIMDJSON_THREADS_ENABLED - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); +} +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) // - // Parse the integer part. + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; - exponent += exp_neg ? 0-exp : exp; + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } } + #endif // SIMDJSON_THREADS_ENABLED +} - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } } - return d; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; } -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } } - } else { - overflow = p-src > 19; + cur_struct_index++; } - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} - exponent += exp_neg ? 0-exp : exp; - } +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +#ifdef SIMDJSON_THREADS_ENABLED - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); } - return d; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +#endif // SIMDJSON_THREADS_ENABLED - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } - } else { - overflow = p-src > 19; - } +} // namespace ondemand +} // namespace fallback +} // namespace simdjson - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +namespace simdjson { - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} - exponent += exp_neg ? 0-exp : exp; - } +} - if (*p != '"') { return NUMBER_ERROR; } +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +/* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; -} +namespace simdjson { +namespace fallback { +namespace ondemand { -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} -} // namespace numberparsing +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -} // namespace ppc64 -} // namespace simdjson +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for ppc64 */ +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} -namespace simdjson { -namespace ppc64 { -// -// internal::implementation_simdjson_result_base inline implementation -// +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; +simdjson_inline value &field::value() & noexcept { + return second; } -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -#if SIMDJSON_EXCEPTIONS +} // namespace ondemand +} // namespace fallback +} // namespace simdjson -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} +namespace simdjson { -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } - -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); } -#endif // SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} -} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ -/* end file simdjson/generic/amalgamated.h for ppc64 */ -/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ -/* begin file simdjson/ppc64/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/ppc64/end.h */ - -#endif // SIMDJSON_PPC64_H -/* end file simdjson/ppc64.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) -/* including simdjson/westmere.h: #include "simdjson/westmere.h" */ -/* begin file simdjson/westmere.h */ -#ifndef SIMDJSON_WESTMERE_H -#define SIMDJSON_WESTMERE_H - -/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ -/* begin file simdjson/westmere/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ -#define SIMDJSON_IMPLEMENTATION westmere -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { - -class implementation; +namespace fallback { +namespace ondemand { -namespace { -namespace simd { +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} -template struct simd8; -template struct simd8x64; +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} -} // namespace simd -} // unnamed namespace +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} -} // namespace westmere -} // namespace simdjson +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") -#endif + return report_error(TAPE_ERROR, "not enough close braces"); +} -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +SIMDJSON_POP_DISABLE_WARNINGS -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} -namespace simdjson { -namespace westmere { -namespace { +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ -/* begin file simdjson/westmere/bitmask.h */ -#ifndef SIMDJSON_WESTMERE_BITMASK_H -#define SIMDJSON_WESTMERE_BITMASK_H +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} -namespace simdjson { -namespace westmere { -namespace { +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; } -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} -#endif // SIMDJSON_WESTMERE_BITMASK_H -/* end file simdjson/westmere/bitmask.h */ -/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ -/* begin file simdjson/westmere/numberparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} -class implementation; +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} -namespace { -namespace simd { +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} -template struct simd8; -template struct simd8x64; +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} -} // namespace simd -} // unnamed namespace +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} -} // namespace westmere -} // namespace simdjson +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; #else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif +} -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); #endif +} -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_DEVELOPMENT_CHECKS -namespace simdjson { -namespace westmere { -namespace numberparsing { +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); #endif - return answer; + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -} // namespace numberparsing -} // namespace westmere + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace fallback } // namespace simdjson -#define SIMDJSON_SWAR_NUMBER_PARSING 1 +namespace simdjson { -#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -/* end file simdjson/westmere/numberparsing_defs.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace { -namespace simd { - - template - struct base { - __m128i value; - - // Zero constructor - simdjson_inline base() : value{__m128i()} {} +namespace fallback { +namespace ondemand { - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} - // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} - static const int SIZE = sizeof(base>::value); +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} - // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +} // namespace ondemand +} // namespace fallback +} // namespace simdjson - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; +namespace simdjson { - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } +} // namespace simdjson - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ +/* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +#include +#include - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +namespace simdjson { +namespace fallback { +namespace ondemand { +namespace logger { - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} -} // namespace simd -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ -/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ -/* begin file simdjson/westmere/stringparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} -namespace simdjson { -namespace westmere { -namespace { +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } -} // unnamed namespace -} // namespace westmere +} // namespace logger +} // namespace ondemand +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for fallback */ +/* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace { -namespace simd { - - template - struct base { - __m128i value; - - // Zero constructor - simdjson_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } +namespace fallback { +namespace ondemand { - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} - static const int SIZE = sizeof(base>::value); +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} - // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +} // namespace ondemand +} // namespace fallback +} // namespace simdjson - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } +namespace simdjson { - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} +namespace simdjson { +namespace fallback { +namespace ondemand { - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } +// +// object_iterator +// - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// -} // namespace simd -} // unnamed namespace -} // namespace westmere +} // namespace ondemand +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ - namespace simdjson { -namespace westmere { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return bs_bits != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + 16); - v0.store(dst); - v1.store(dst + 16); - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; } -} // unnamed namespace -} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -/* end file simdjson/westmere/stringparsing_defs.h */ -/* end file simdjson/westmere/begin.h */ -/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ -/* begin file simdjson/generic/amalgamated.h for westmere */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) -#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! -#endif - -/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ -/* begin file simdjson/generic/base.h for westmere */ -#ifndef SIMDJSON_GENERIC_BASE_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ -/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ -/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ -/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ -/* amalgamation skipped (editor-only): #else */ -/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ -/* amalgamation skipped (editor-only): #endif */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace fallback { +namespace ondemand { -struct open_container; -class dom_parser_implementation; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } -} // namespace westmere -} // namespace simdjson + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif -#endif // SIMDJSON_GENERIC_BASE_H -/* end file simdjson/generic/base.h for westmere */ -/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ -/* begin file simdjson/generic/jsoncharutils.h for westmere */ -#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + json.remove_utf8_bom(); -namespace simdjson { -namespace westmere { -namespace { -namespace jsoncharutils { + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} + json.remove_utf8_bom(); -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; } - // will return 0 when the code point was too large. - return 0; // bad r + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -#endif -} // namespace jsoncharutils -} // unnamed namespace -} // namespace westmere -} // namespace simdjson +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} -#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H -/* end file simdjson/generic/jsoncharutils.h for westmere */ -/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ -/* begin file simdjson/generic/atomparsing.h for westmere */ -#ifndef SIMDJSON_GENERIC_ATOMPARSING_H +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} -#include +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} -namespace simdjson { -namespace westmere { -namespace { -/// @private -namespace atomparsing { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_warn_unused -simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; } -simdjson_warn_unused -simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_warn_unused -simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -} // namespace atomparsing -} // unnamed namespace -} // namespace westmere +} // namespace ondemand +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_GENERIC_ATOMPARSING_H -/* end file simdjson/generic/atomparsing.h for westmere */ -/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ -/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ -#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - namespace simdjson { -namespace westmere { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; - simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); +} // namespace simdjson -}; +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -} // namespace westmere -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; +namespace fallback { +namespace ondemand { -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - _capacity = capacity; - return SUCCESS; -} +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - _max_depth = max_depth; - return SUCCESS; +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H -/* end file simdjson/generic/dom_parser_implementation.h for westmere */ -/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace westmere { +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_inline implementation_simdjson_result_base() noexcept = default; +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} - /** - * Create a new error result. - */ - simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; - /** - * Create a new successful result. - */ - simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_inline void tie(T &value, error_code &error) && noexcept; +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_inline error_code get(T &value) && noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} - /** - * The error. - */ - simdjson_inline error_code error() const noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} -#if SIMDJSON_EXCEPTIONS +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T& value() & noexcept(false); - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& value() && noexcept(false); +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline T&& take_value() && noexcept(false); +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_inline operator T&&() && noexcept(false); +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} +} // namespace ondemand +} // namespace fallback +} // namespace simdjson -#endif // SIMDJSON_EXCEPTIONS +namespace simdjson { - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} // namespace westmere +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H -/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ -/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ -/* begin file simdjson/generic/numberparsing.h for westmere */ -#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +/* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include -#include - namespace simdjson { -namespace westmere { -namespace numberparsing { -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) -#endif +inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace fallback::ondemand; + fallback::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) { - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; + case json_type::array: + { + fallback::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); } - if (negative) { - d = -d; + case json_type::object: + { + fallback::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); } - return true; + default: + return trim(x.raw_json_token()); } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. +} - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = negative ? -0.0 : 0.0; - return true; - } +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without - // Fallback" (https://arxiv.org/abs/2212.06644), at this point we are sure that the product - // is sufficiently accurate, and more computation is not needed. - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); +namespace simdjson { namespace fallback { namespace ondemand { - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = negative ? -0.0 : 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } +} +#endif - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - d = to_double(mantissa, real_exponent, negative); - return true; } +#endif -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } - -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; } +#endif -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return SUCCESS; } +#endif +}}} // namespace simdjson::fallback::ondemand -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. +namespace simdjson { +namespace fallback { +namespace ondemand { - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ } -simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -} // unnamed namespace -/** @private */ -static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double* answer) { - if (parse_float_fallback(src, answer)) { - return SUCCESS; - } - return INVALID_NUMBER(src); +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -/** @private */ -template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: We do not pass a reference to the to slow_float_parsing. If we passed our writer - // reference to it, it would force it to be stored in memory, preventing the compiler from - // picking it apart and putting into registers. i.e. if we pass it as reference, - // it gets slow. - double d; - error_code error = slow_float_parsing(src, &d); - writer.append_double(d); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero - WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} -template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } -#else +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +} // namespace ondemand +} // namespace fallback +} // namespace simdjson - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); +namespace simdjson { - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } +} // namespace simdjson - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; } + return true; +} - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; } -// Inlineable functions -namespace { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; // - // Parse the integer part. + // Initially, the object can be in one of a few different places: // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } - return i; + // If the loop ended, we're out of fields to look at. + return false; } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: // - // Parse the integer part. + // 1. At the first key: // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif } - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; + // After initial processing, we will be in one of two states: // - // Parse the integer part. + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } - return i; -} + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + uint8_t(negative); - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); } -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = src; - uint64_t i = 0; - while (parse_digit(*src, i)) { src++; } + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(src - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*src)) { - // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*src != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); } - } else { - overflow = p-src > 19; } + return SUCCESS; +} - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - exponent += exp_neg ? 0-exp : exp; + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } - return d; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } -simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += uint8_t(negative); - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return number_type::unsigned_integer; - } - } - return number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; +} + +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - // Hopefully, we have 'e' or 'E' or '.'. - return number_type::floating_point_number; + return result; } -// Never read at src_end or beyond -simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += uint8_t(negative); +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + return _json_iter->skip_child(depth()); +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} - exponent += exp_neg ? 0-exp : exp; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { - return NUMBER_ERROR; - } - return d; +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += uint8_t(negative) + 1; +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = p-start_digits > 19; - } + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } } else { - overflow = p-src > 19; + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); } - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - exponent += exp_neg ? 0-exp : exp; - } + return SUCCESS; +} - if (*p != '"') { return NUMBER_ERROR; } - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src - uint8_t(negative), &d)) { - return NUMBER_ERROR; - } - return d; + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -} // unnamed namespace -#endif // SIMDJSON_SKIPNUMBERPARSING + assert_at_non_root_start(); + return _json_iter->peek(); +} -} // namespace numberparsing +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -} // namespace westmere -} // namespace simdjson + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} -#endif // SIMDJSON_GENERIC_NUMBERPARSING_H -/* end file simdjson/generic/numberparsing.h for westmere */ +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} -/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} -namespace simdjson { -namespace westmere { +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} -// -// internal::implementation_simdjson_result_base inline implementation -// +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} -template -simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -template -simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -#if SIMDJSON_EXCEPTIONS +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} -template -simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -template -simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -template -simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -template -simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -#endif // SIMDJSON_EXCEPTIONS +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} -template -simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -template -simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -template -simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } } -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} -} // namespace westmere -} // namespace simdjson +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} -#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H -/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ -/* end file simdjson/generic/amalgamated.h for westmere */ -/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ -/* begin file simdjson/westmere/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_UNTARGET_REGION -#endif +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} -/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/westmere/end.h */ +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} -#endif // SIMDJSON_WESTMERE_H -/* end file simdjson/westmere.h */ -#else -#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION -#endif +} // namespace ondemand +} // namespace fallback +} // namespace simdjson -/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ -#undef SIMDJSON_CONDITIONAL_INCLUDE +namespace simdjson { -#endif // SIMDJSON_BUILTIN_H -/* end file simdjson/builtin.h */ -/* skipped duplicate #include "simdjson/builtin/base.h" */ +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -/* including simdjson/generic/ondemand/dependencies.h: #include "simdjson/generic/ondemand/dependencies.h" */ -/* begin file simdjson/generic/ondemand/dependencies.h */ -#ifdef SIMDJSON_CONDITIONAL_INCLUDE -#error simdjson/generic/ondemand/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! -#endif +} // namespace simdjson -#ifndef SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H -#define SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ -// Internal headers needed for ondemand generics. -// All includes not under simdjson/generic/ondemand must be here! -// Otherwise, amalgamation will fail. -/* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* skipped duplicate #include "simdjson/implementation.h" */ -/* skipped duplicate #include "simdjson/padded_string.h" */ -/* skipped duplicate #include "simdjson/padded_string_view.h" */ -/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ -#endif // SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H -/* end file simdjson/generic/ondemand/dependencies.h */ +/* end file simdjson/generic/ondemand/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* defining SIMDJSON_CONDITIONAL_INCLUDE */ -#define SIMDJSON_CONDITIONAL_INCLUDE +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ -#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) -/* including simdjson/arm64/ondemand.h: #include "simdjson/arm64/ondemand.h" */ -/* begin file simdjson/arm64/ondemand.h */ -#ifndef SIMDJSON_ARM64_ONDEMAND_H -#define SIMDJSON_ARM64_ONDEMAND_H +#endif // SIMDJSON_FALLBACK_ONDEMAND_H +/* end file simdjson/fallback/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell/ondemand.h: #include "simdjson/haswell/ondemand.h" */ +/* begin file simdjson/haswell/ondemand.h */ +#ifndef SIMDJSON_HASWELL_ONDEMAND_H +#define SIMDJSON_HASWELL_ONDEMAND_H -/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ -/* begin file simdjson/arm64/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ -#define SIMDJSON_IMPLEMENTATION arm64 -/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ -/* begin file simdjson/arm64/base.h */ -#ifndef SIMDJSON_ARM64_BASE_H -#define SIMDJSON_ARM64_BASE_H +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { /** - * Implementation for NEON (ARMv8). + * Implementation for Haswell (Intel AVX2). */ -namespace arm64 { +namespace haswell { class implementation; @@ -25940,40 +53783,90 @@ template struct simd8x64; } // namespace simd } // unnamed namespace -} // namespace arm64 +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_ARM64_BASE_H -/* end file simdjson/arm64/base.h */ -/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ -/* begin file simdjson/arm64/intrinsics.h */ -#ifndef SIMDJSON_ARM64_INTRINSICS_H -#define SIMDJSON_ARM64_INTRINSICS_H +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// This should be the correct header whether -// you use visual studio or other compilers. -#include +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO -static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO -#endif // SIMDJSON_ARM64_INTRINSICS_H -/* end file simdjson/arm64/intrinsics.h */ -/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ -/* begin file simdjson/arm64/bitmanipulation.h */ -#ifndef SIMDJSON_ARM64_BITMANIPULATION_H -#define SIMDJSON_ARM64_BITMANIPULATION_H +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// We enable bmi2 only if LLVM/clang is used, because GCC may not +// make good use of it. See https://github.com/simdjson/simdjson/pull/2243 +#if defined(__clang__) +SIMDJSON_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt") +#else +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace { // We sometimes call trailing_zero on inputs that are zero, @@ -25985,80 +53878,44 @@ SIMDJSON_NO_SANITIZE_UNDEFINED // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); + return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_inline int count_ones(uint64_t input_num) { - return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); + return int(_lzcnt_u64(input_num)); } - -#if defined(__GNUC__) // catches clang and gcc -/** - * ARM has a fast 64-bit "bit reversal function" that is handy. However, - * it is not generally available as an intrinsic function under Visual - * Studio (though this might be changing). Even under clang/gcc, we - * apparently need to invoke inline assembly. - */ -/* - * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that - * work well with bit reversal may use it. - */ -#define SIMDJSON_PREFER_REVERSE_BITS 1 - -/* reverse the bits */ -simdjson_inline uint64_t reverse_bits(uint64_t input_num) { - uint64_t rev_bits; - __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); - return rev_bits; +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores } - -/** - * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, - * then this will set to zero the leading bit. It is possible for leading_zeroes to be - * greating or equal to 63 in which case we trigger undefined behavior, but the output - * of such undefined behavior is never used. - **/ -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { - return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); } - #endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); @@ -26066,22 +53923,23 @@ simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *re } } // unnamed namespace -} // namespace arm64 +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_ARM64_BITMANIPULATION_H -/* end file simdjson/arm64/bitmanipulation.h */ -/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ -/* begin file simdjson/arm64/bitmask.h */ -#ifndef SIMDJSON_ARM64_BITMASK_H -#define SIMDJSON_ARM64_BITMASK_H +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace { // @@ -26089,78 +53947,68 @@ namespace { // // For example, prefix_xor(00100100) == 00011100 // -simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { - ///////////// - // We could do this with PMULL, but it is apparently slow. - // - //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension - //return vmull_p64(-1ULL, bitmask); - //#else - // Analysis by @sebpop: - // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out - // in between other vector code, so effectively the extra cycles of the sequence do not matter - // because the GPR units are idle otherwise and the critical path is on the FP side. - // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) - // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) - /////////// - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); } } // unnamed namespace -} // namespace arm64 +} // namespace haswell } // namespace simdjson -#endif -/* end file simdjson/arm64/bitmask.h */ -/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ -/* begin file simdjson/arm64/numberparsing_defs.h */ -#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H -#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - -#if _M_ARM64 -// __umulh requires intrin.h -#include -#endif // _M_ARM64 +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace numberparsing { -// we don't have SSE, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest } +/** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -26170,302 +54018,184 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } } // namespace numberparsing -} // namespace arm64 +} // namespace haswell } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 -#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H -/* end file simdjson/arm64/numberparsing_defs.h */ -/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ -/* begin file simdjson/arm64/simd.h */ -#ifndef SIMDJSON_ARM64_SIMD_H -#define SIMDJSON_ARM64_SIMD_H +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace { namespace simd { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -namespace { -// Start of private section with Visual Studio workaround - + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; -#ifndef simdjson_make_uint8x16_t -#define simdjson_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ - x13, x14, x15, x16) \ - ([=]() { \ - uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_u8(array); \ - }()) -#endif -#ifndef simdjson_make_int8x16_t -#define simdjson_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ - x13, x14, x15, x16) \ - ([=]() { \ - int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_s8(array); \ - }()) -#endif + // Zero constructor + simdjson_inline base() : value{__m256i()} {} -#ifndef simdjson_make_uint8x8_t -#define simdjson_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_u8(array); \ - }()) -#endif -#ifndef simdjson_make_int8x8_t -#define simdjson_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_s8(array); \ - }()) -#endif -#ifndef simdjson_make_uint16x8_t -#define simdjson_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_u16(array); \ - }()) -#endif -#ifndef simdjson_make_int16x8_t -#define simdjson_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_s16(array); \ - }()) -#endif + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} -// End of private section with Visual Studio workaround -} // namespace -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + // Forward-declared so they can be used by splat and friends. template struct simd8; - // - // Base class of simd8 and simd8, both of which use uint8x16_t internally. - // template> - struct base_u8 { - uint8x16_t value; - static const int SIZE = sizeof(value); + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; - // Conversion from/to SIMD register - simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdjson_inline operator const uint8x16_t&() const { return this->value; } - simdjson_inline operator uint8x16_t&() { return this->value; } + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} - // Bit operations - simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } - simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } - simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } - simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + static const int SIZE = sizeof(base::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_u8(prev_chunk, *this, 16 - N); + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> - struct simd8: base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } - simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} - // False constructor - simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor - simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - // We return uint32_t instead of uint16_t because that seems to be more efficient for most - // purposes (cutting it down to uint16_t costs performance in some compilers). - simdjson_inline uint32_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = simdjson_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - auto minput = *this & bit_mask; - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); - } - simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; - // Unsigned bytes - template<> - struct simd8: base_u8 { - static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } - static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } - static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } - - simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} - // Zero constructor - simdjson_inline simd8() : simd8(zero()) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(simdjson_make_uint8x16_t( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} -#else - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(uint8x16_t{ - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - }) {} -#endif - + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { - return simd8( + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } - // Store to array - simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } - - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } - - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } - - // Order-specific operations - simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } - simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } - simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } - simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } - // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} - // Bit-specific operations - simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } - template - simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } - template - simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); + return _mm256_shuffle_epi8(lookup_table, *this); } - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes // get written. // Design consideration: it seems like a function with the - // signature simd8 compress(uint16_t mask) would be + // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_inline void compress(uint16_t mask, L * output) const { + simdjson_inline void compress(uint32_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes + // we do it in four steps, first 8 bytes and then second 8 bytes... uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... // next line just loads the 64-bit values thintable_epi8[mask1] and // thintable_epi8[mask2] into a 128-bit register, using only // two instructions on most compilers. - uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; - uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); - // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - uint8x16_t inc = simdjson_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); -#else - uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; -#endif - shufmask = vaddq_u8(shufmask, inc); + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); // this is the version "nearly pruned" - uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); - vst1q_u8(reinterpret_cast(output), answer); - } + int pop3 = BitsSetTable256mul2[mask3]; - // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a - // bitset) to output1, then those corresponding to a 0 in the high half to output2. - template - simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { - using internal::thintable_epi8; - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); - uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); - // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - uint8x8_t inc = simdjson_make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); -#else - uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; -#endif - compactmask2 = vadd_u8(compactmask2, inc); - // store each result (with the second store possibly overlapping the first) - vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); - vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); } template @@ -26481,181 +54211,175 @@ namespace { replace12, replace13, replace14, replace15 )); } - - template - simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { - return vqtbl1q_u8(*this, simd8(original)); - } }; // Signed bytes template<> - struct simd8 { - int8x16_t value; - - static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } - static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } - static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } - - // Conversion from/to SIMD register - simdjson_inline simd8(const int8x16_t _value) : value{_value} {} - simdjson_inline operator const int8x16_t&() const { return this->value; } - simdjson_inline operator int8x16_t&() { return this->value; } - - // Zero constructor - simdjson_inline simd8() : simd8(zero()) {} + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(simdjson_make_int8x16_t( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 )) {} -#else - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(int8x16_t{ - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - }) {} -#endif // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } - // Store to array - simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } - - // Explicit conversion to/from unsigned - // - // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. - // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 - // and relatively ugly and hard to read. -#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} -#endif - simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } - - // Math - simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } - simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_s8(prev_chunk, *this, 16 - N); + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); } - // Perform a lookup assuming no value is larger than 16 - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } - template - simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { - return vqtbl1q_s8(*this, simd8(original)); - } + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); } - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); } - - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); - // compute the prefix sum of the popcounts of each byte - uint64_t offsets = popcounts * 0x0101010101010101; - this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); - this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); - this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); - this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); - return offsets >> 56; + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; } - simdjson_inline uint64_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = simdjson_make_uint8x16_t( - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask ); -#else - const uint8x16_t bit_mask = { - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - }; -#endif - // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. - uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); - uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] ).to_bitmask(); } @@ -26663,33 +54387,32 @@ namespace { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask + this->chunks[1] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd + } // unnamed namespace -} // namespace arm64 +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_ARM64_SIMD_H -/* end file simdjson/arm64/simd.h */ -/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ -/* begin file simdjson/arm64/stringparsing_defs.h */ -#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H -#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace { using namespace simd; @@ -26701,7 +54424,7 @@ struct backslash_and_quote { simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } @@ -26710,41 +54433,34 @@ struct backslash_and_quote { }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require + // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); - - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we - // smash them together into a 64-byte mask and get the bitmask from there. - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits }; } } // unnamed namespace -} // namespace arm64 +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H -/* end file simdjson/arm64/stringparsing_defs.h */ - -#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 -/* end file simdjson/arm64/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for arm64: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for arm64 */ +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for haswell: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for haswell */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for arm64: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for arm64 */ +/* including simdjson/generic/ondemand/base.h for haswell: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -26753,7 +54469,7 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * @@ -26764,8 +54480,8 @@ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; -/** @copydoc simdjson::arm64::number_type */ -using number_type = simdjson::arm64::number_type; +/** @copydoc simdjson::haswell::number_type */ +using number_type = simdjson::haswell::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; @@ -26788,13 +54504,139 @@ class value; class value_iterator; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for arm64 */ -/* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for arm64 */ +/* end file simdjson/generic/ondemand/base.h for haswell */ +/* including simdjson/generic/ondemand/deserialize.h for haswell: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for haswell */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = haswell::ondemand::value; + using document_type = haswell::ondemand::document; + using document_reference_type = haswell::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -26804,7 +54646,7 @@ class value_iterator; /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -26922,7 +54764,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -26932,7 +54775,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -27054,7 +54898,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -27064,7 +54909,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -27172,6 +55018,7 @@ class value_iterator { simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -27266,18 +55113,19 @@ class value_iterator { friend class object; friend class array; friend class value; + friend class field; }; // value_iterator } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -27285,9 +55133,9 @@ struct simdjson_result : public arm64::implemen } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H -/* end file simdjson/generic/ondemand/value_iterator.h for arm64 */ -/* including simdjson/generic/ondemand/value.h for arm64: #include "simdjson/generic/ondemand/value.h" */ -/* begin file simdjson/generic/ondemand/value.h for arm64 */ +/* end file simdjson/generic/ondemand/value_iterator.h for haswell */ +/* including simdjson/generic/ondemand/value.h for haswell: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -27295,12 +55143,15 @@ struct simdjson_result : public arm64::implemen /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace arm64 { -namespace ondemand { +namespace haswell { +namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. @@ -27325,15 +55176,21 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } + /** * Get this value as the given type. * @@ -27343,7 +55200,38 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) noexcept; + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } /** * Cast this JSON value to an array. @@ -27419,6 +55307,17 @@ class value { * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. @@ -27484,6 +55383,17 @@ class value { simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); /** * Cast this JSON value to an array. * @@ -27647,7 +55557,7 @@ class value { * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. @@ -27659,6 +55569,7 @@ class value { simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -27683,6 +55594,13 @@ class value { * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; /** * Checks whether the value is a negative number. @@ -27715,10 +55633,12 @@ class value { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not @@ -27745,6 +55665,8 @@ class value { * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you @@ -27760,7 +55682,6 @@ class value { */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - /** * Get the raw JSON for this token. * @@ -27858,6 +55779,20 @@ class value { */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + protected: /** * Create a value. @@ -27897,23 +55832,24 @@ class value { friend class object; friend struct simdjson_result; friend struct simdjson_result; + friend class field; }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result get_array() noexcept; - simdjson_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; @@ -27925,7 +55861,7 @@ struct simdjson_result : public arm64::implementation_si template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; @@ -27934,20 +55870,22 @@ struct simdjson_result : public arm64::implementation_si template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator arm64::ondemand::array() noexcept(false); - simdjson_inline operator arm64::ondemand::object() noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator haswell::ondemand::array() noexcept(false); + simdjson_inline operator haswell::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -27969,9 +55907,9 @@ struct simdjson_result : public arm64::implementation_si * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -27987,18 +55925,19 @@ struct simdjson_result : public arm64::implementation_si * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. @@ -28007,12 +55946,13 @@ struct simdjson_result : public arm64::implementation_si * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; @@ -28022,15 +55962,16 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H -/* end file simdjson/generic/ondemand/value.h for arm64 */ -/* including simdjson/generic/ondemand/logger.h for arm64: #include "simdjson/generic/ondemand/logger.h" */ -/* begin file simdjson/generic/ondemand/logger.h for arm64 */ +/* end file simdjson/generic/ondemand/value.h for haswell */ +/* including simdjson/generic/ondemand/logger.h for haswell: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -28039,7 +55980,7 @@ struct simdjson_result : public arm64::implementation_si /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical @@ -28085,13 +56026,13 @@ static inline void log_error(const value_iterator &iter, const char *error, cons } // namespace logger } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H -/* end file simdjson/generic/ondemand/logger.h for arm64 */ -/* including simdjson/generic/ondemand/token_iterator.h for arm64: #include "simdjson/generic/ondemand/token_iterator.h" */ -/* begin file simdjson/generic/ondemand/token_iterator.h for arm64 */ +/* end file simdjson/generic/ondemand/logger.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator.h for haswell: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -28102,7 +56043,7 @@ static inline void log_error(const value_iterator &iter, const char *error, cons /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -28141,7 +56082,7 @@ class token_iterator { * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -28171,7 +56112,14 @@ class token_iterator { * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; - + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Return the current index. */ @@ -28226,15 +56174,15 @@ class token_iterator { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -28243,9 +56191,9 @@ struct simdjson_result : public arm64::implemen } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H -/* end file simdjson/generic/ondemand/token_iterator.h for arm64 */ -/* including simdjson/generic/ondemand/json_iterator.h for arm64: #include "simdjson/generic/ondemand/json_iterator.h" */ -/* begin file simdjson/generic/ondemand/json_iterator.h for arm64 */ +/* end file simdjson/generic/ondemand/token_iterator.h for haswell */ +/* including simdjson/generic/ondemand/json_iterator.h for haswell: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -28256,7 +56204,7 @@ struct simdjson_result : public arm64::implemen /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -28394,7 +56342,7 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -28422,7 +56370,7 @@ class json_iterator { * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -28433,13 +56381,21 @@ class json_iterator { * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; @@ -28533,6 +56489,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -28548,6 +56507,7 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; + friend class field; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template @@ -28555,15 +56515,15 @@ class json_iterator { }; // json_iterator } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -28572,9 +56532,9 @@ struct simdjson_result : public arm64::implement } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H -/* end file simdjson/generic/ondemand/json_iterator.h for arm64 */ -/* including simdjson/generic/ondemand/json_type.h for arm64: #include "simdjson/generic/ondemand/json_type.h" */ -/* begin file simdjson/generic/ondemand/json_type.h for arm64 */ +/* end file simdjson/generic/ondemand/json_iterator.h for haswell */ +/* including simdjson/generic/ondemand/json_type.h for haswell: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -28585,7 +56545,7 @@ struct simdjson_result : public arm64::implement /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -28718,15 +56678,15 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t #endif } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -28735,9 +56695,9 @@ struct simdjson_result : public arm64::implementatio } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H -/* end file simdjson/generic/ondemand/json_type.h for arm64 */ -/* including simdjson/generic/ondemand/raw_json_string.h for arm64: #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +/* end file simdjson/generic/ondemand/json_type.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string.h for haswell: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -28747,7 +56707,7 @@ struct simdjson_result : public arm64::implementatio /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -28923,30 +56883,30 @@ simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_js } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H -/* end file simdjson/generic/ondemand/raw_json_string.h for arm64 */ -/* including simdjson/generic/ondemand/parser.h for arm64: #include "simdjson/generic/ondemand/parser.h" */ -/* begin file simdjson/generic/ondemand/parser.h for arm64 */ +/* end file simdjson/generic/ondemand/raw_json_string.h for haswell */ +/* including simdjson/generic/ondemand/parser.h for haswell: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -28958,12 +56918,12 @@ struct simdjson_result : public arm64::impleme #include namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -29033,6 +56993,22 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). @@ -29047,6 +57023,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -29191,9 +57170,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -29201,7 +57180,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -29224,8 +57203,12 @@ class parser { * behavior of the parser for future operations. */ bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif - /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. @@ -29273,9 +57256,20 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -29289,15 +57283,15 @@ class parser { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -29305,11 +57299,11 @@ struct simdjson_result : public arm64::implementation_s } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H -/* end file simdjson/generic/ondemand/parser.h for arm64 */ +/* end file simdjson/generic/ondemand/parser.h for haswell */ // All other declarations -/* including simdjson/generic/ondemand/array.h for arm64: #include "simdjson/generic/ondemand/array.h" */ -/* begin file simdjson/generic/ondemand/array.h for arm64 */ +/* including simdjson/generic/ondemand/array.h for haswell: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -29320,7 +57314,7 @@ struct simdjson_result : public arm64::implementation_s /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -29356,7 +57350,8 @@ class array { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. * * To check that an array is empty, it is more performant to use * the is_empty() method. @@ -29412,6 +57407,22 @@ class array { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -29483,25 +57494,26 @@ class array { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; @@ -29509,9 +57521,9 @@ struct simdjson_result : public arm64::implementation_si } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H -/* end file simdjson/generic/ondemand/array.h for arm64 */ -/* including simdjson/generic/ondemand/array_iterator.h for arm64: #include "simdjson/generic/ondemand/array_iterator.h" */ -/* begin file simdjson/generic/ondemand/array_iterator.h for arm64 */ +/* end file simdjson/generic/ondemand/array.h for haswell */ +/* including simdjson/generic/ondemand/array_iterator.h for haswell: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -29523,7 +57535,7 @@ struct simdjson_result : public arm64::implementation_si namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -29583,15 +57595,15 @@ class array_iterator { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -29599,28 +57611,31 @@ struct simdjson_result : public arm64::implemen // Iterator interface // - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H -/* end file simdjson/generic/ondemand/array_iterator.h for arm64 */ -/* including simdjson/generic/ondemand/document.h for arm64: #include "simdjson/generic/ondemand/document.h" */ -/* begin file simdjson/generic/ondemand/document.h for arm64 */ +/* end file simdjson/generic/ondemand/array_iterator.h for haswell */ +/* including simdjson/generic/ondemand/document.h for haswell: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -29791,22 +57806,39 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); } /** @@ -29820,11 +57852,57 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) & noexcept; + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -29908,7 +57986,8 @@ class document { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. */ simdjson_inline simdjson_result count_elements() & noexcept; /** @@ -29997,7 +58076,7 @@ class document { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array @@ -30020,6 +58099,7 @@ class document { simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -30043,6 +58123,14 @@ class document { */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + /** * Checks whether the document is a negative number. * @@ -30070,9 +58158,11 @@ class document { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected @@ -30190,6 +58280,14 @@ class document { * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values @@ -30206,6 +58304,30 @@ class document { * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing @@ -30246,6 +58368,11 @@ class document { /** * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. */ class document_reference { public: @@ -30271,10 +58398,82 @@ class document_reference { simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; - #if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); @@ -30294,11 +58493,13 @@ class document_reference { simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; @@ -30308,25 +58509,27 @@ class document_reference { simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + private: document *doc{nullptr}; }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -30337,52 +58540,56 @@ struct simdjson_result : public arm64::implementation template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; - #if SIMDJSON_EXCEPTIONS - simdjson_inline operator arm64::ondemand::array() & noexcept(false); - simdjson_inline operator arm64::ondemand::object() & noexcept(false); + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator haswell::ondemand::array() & noexcept(false); + simdjson_inline operator haswell::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator arm64::ondemand::value() noexcept(false); + simdjson_inline operator haswell::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; @@ -30393,14 +58600,14 @@ struct simdjson_result : public arm64::implementation namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result(haswell::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -30411,54 +58618,64 @@ struct simdjson_result : public arm64::impl template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator arm64::ondemand::array() & noexcept(false); - simdjson_inline operator arm64::ondemand::object() & noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator haswell::ondemand::array() & noexcept(false); + simdjson_inline operator haswell::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator arm64::ondemand::value() noexcept(false); + simdjson_inline operator haswell::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H -/* end file simdjson/generic/ondemand/document.h for arm64 */ -/* including simdjson/generic/ondemand/document_stream.h for arm64: #include "simdjson/generic/ondemand/document_stream.h" */ -/* begin file simdjson/generic/ondemand/document_stream.h for arm64 */ +/* end file simdjson/generic/ondemand/document.h for haswell */ +/* including simdjson/generic/ondemand/document_stream.h for haswell: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -30476,7 +58693,7 @@ struct simdjson_result : public arm64::impl #endif namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -30583,10 +58800,9 @@ class document_stream { class iterator { public: using value_type = simdjson_result; - using reference = value_type; - + using reference = simdjson_result; + using pointer = void; using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; /** @@ -30596,7 +58812,7 @@ class document_stream { /** * Get the current document (or error). */ - simdjson_inline simdjson_result operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -30778,18 +58994,18 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -30797,9 +59013,9 @@ struct simdjson_result : public arm64::impleme } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H -/* end file simdjson/generic/ondemand/document_stream.h for arm64 */ -/* including simdjson/generic/ondemand/field.h for arm64: #include "simdjson/generic/ondemand/field.h" */ -/* begin file simdjson/generic/ondemand/field.h for arm64 */ +/* end file simdjson/generic/ondemand/document_stream.h for haswell */ +/* including simdjson/generic/ondemand/field.h for haswell: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -30811,7 +59027,7 @@ struct simdjson_result : public arm64::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -30838,12 +59054,39 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; /** * Get the field value. */ @@ -30862,29 +59105,33 @@ class field : public std::pair { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; - simdjson_inline simdjson_result key() noexcept; - simdjson_inline simdjson_result value() noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H -/* end file simdjson/generic/ondemand/field.h for arm64 */ -/* including simdjson/generic/ondemand/object.h for arm64: #include "simdjson/generic/ondemand/object.h" */ -/* begin file simdjson/generic/ondemand/object.h for arm64 */ +/* end file simdjson/generic/ondemand/field.h for haswell */ +/* including simdjson/generic/ondemand/object.h for haswell: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -30895,7 +59142,7 @@ struct simdjson_result : public arm64::implementation_si /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { /** @@ -30943,6 +59190,8 @@ class object { * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -30964,7 +59213,7 @@ class object { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. @@ -30980,6 +59229,8 @@ class object { * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -31027,11 +59278,24 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -31093,27 +59357,29 @@ class object { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -31124,9 +59390,9 @@ struct simdjson_result : public arm64::implementation_s } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H -/* end file simdjson/generic/ondemand/object.h for arm64 */ -/* including simdjson/generic/ondemand/object_iterator.h for arm64: #include "simdjson/generic/ondemand/object_iterator.h" */ -/* begin file simdjson/generic/ondemand/object_iterator.h for arm64 */ +/* end file simdjson/generic/ondemand/object.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator.h for haswell: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -31137,7 +59403,7 @@ struct simdjson_result : public arm64::implementation_s /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { class object_iterator { @@ -31178,15 +59444,15 @@ class object_iterator { }; } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public arm64::implementation_simdjson_result_base { +struct simdjson_result : public haswell::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(arm64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(haswell::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -31195,21 +59461,21 @@ struct simdjson_result : public arm64::impleme // // Reads key and value, yielding them to the user. - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H -/* end file simdjson/generic/ondemand/object_iterator.h for arm64 */ -/* including simdjson/generic/ondemand/serialization.h for arm64: #include "simdjson/generic/ondemand/serialization.h" */ -/* begin file simdjson/generic/ondemand/serialization.h for arm64 */ +/* end file simdjson/generic/ondemand/object_iterator.h for haswell */ +/* including simdjson/generic/ondemand/serialization.h for haswell: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -31223,30 +59489,30 @@ namespace simdjson { * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept; +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ -inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept; +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept; +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** @@ -31256,7 +59522,7 @@ inline simdjson_result to_json_string(simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -31278,9 +59544,9 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -31290,13 +59556,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value); +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not @@ -31306,22 +59572,237 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif -}}} // namespace simdjson::arm64::ondemand +}}} // namespace simdjson::haswell::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H -/* end file simdjson/generic/ondemand/serialization.h for arm64 */ +/* end file simdjson/generic/ondemand/serialization.h for haswell */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for haswell: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for haswell */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + haswell::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + haswell::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for haswell */ // Inline definitions -/* including simdjson/generic/ondemand/array-inl.h for arm64: #include "simdjson/generic/ondemand/array-inl.h" */ -/* begin file simdjson/generic/ondemand/array-inl.h for arm64 */ +/* including simdjson/generic/ondemand/array-inl.h for haswell: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ @@ -31331,7 +59812,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for haswell */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::array_iterator &&value +) noexcept + : haswell::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : haswell::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/value-inl.h for haswell: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { - bool has_value; - SIMDJSON_TRY(iter.started_array().get(has_value)); - return array(iter); + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result array::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline simdjson_result array::end() noexcept { - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline error_code array::consume() noexcept { - auto error = iter.json_iter().skip_child(iter.depth()-1); - if(error) { iter.abandon(); } - return error; + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); } -simdjson_inline simdjson_result array::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline simdjson_result array::count_elements() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the array after counting the number of elements. - iter.reset_array(); - return count; +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_inline simdjson_result array::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_array().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); } -inline simdjson_result array::reset() & noexcept { - return iter.reset_array(); +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); } -inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); } +} - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - // Get the child - auto child = at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; } + return true; +} - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; } - return child; } -simdjson_inline simdjson_result array::at(size_t index) noexcept { - size_t i = 0; - for (auto value : *this) { - if (i == index) { return value; } - i++; +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; } - return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - arm64::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error -) noexcept - : implementation_simdjson_result_base(error) +) noexcept : + implementation_simdjson_result_base(error) { } - -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } - return first.end(); + return {}; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } - return first.is_empty(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } - return first.at(index); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } - return first.raw_json(); + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H -/* end file simdjson/generic/ondemand/array-inl.h for arm64 */ -/* including simdjson/generic/ondemand/array_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} -namespace simdjson { -namespace arm64 { -namespace ondemand { +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} -simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} -simdjson_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); } -simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); } -simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); } -simdjson_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if (( error = iter.error() )) { return *this; } - if (( error = iter.skip_child() )) { return *this; } - if (( error = iter.has_next_element().error() )) { return *this; } - return *this; +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif -} // namespace ondemand -} // namespace arm64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - arm64::ondemand::array_iterator &&value -) noexcept - : arm64::implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : arm64::implementation_simdjson_result_base({}, error) -{ + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } - return *first; + return first.current_location(); } -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ -/* including simdjson/generic/ondemand/document-inl.h for arm64: #include "simdjson/generic/ondemand/document-inl.h" */ -/* begin file simdjson/generic/ondemand/document-inl.h for arm64 */ +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for haswell */ +/* including simdjson/generic/ondemand/document-inl.h for haswell: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept @@ -31795,22 +60842,29 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -31867,7 +60921,14 @@ simdjson_inline simdjson_result document::operator[](const char *key) & n } simdjson_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } @@ -31889,12 +60950,21 @@ simdjson_inline simdjson_result document::type() noexcept { } simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } @@ -31914,7 +60984,7 @@ simdjson_inline simdjson_result document::get_number() noexcept { simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { @@ -31935,275 +61005,305 @@ simdjson_inline simdjson_result document::at_pointer(std::string_view jso } } +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - arm64::ondemand::document &&value +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base( + implementation_simdjson_result_base( error ) { } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first).get(); + return std::forward(first).get(); } template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return std::forward(first).get(out); } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first); + return std::forward(first); } -template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { if (error()) { return error(); } - out = std::forward(first); + out = std::forward(first); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} -simdjson_inline bool simdjson_result::is_negative() noexcept { +simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline bool simdjson_result::at_end() const noexcept { +simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} } // namespace simdjson namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} @@ -32239,15 +61339,25 @@ simdjson_inline simdjson_result document_reference::get_raw_jso simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } - +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif @@ -32264,6 +61374,7 @@ simdjson_inline simdjson_result document_reference::find_field_unordered( simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } @@ -32272,214 +61383,264 @@ simdjson_inline simdjson_result document_reference::get_number_type simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(haswell::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(haswell::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for arm64 */ -/* including simdjson/generic/ondemand/document_stream-inl.h for arm64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/document-inl.h for haswell */ +/* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -32494,7 +61655,7 @@ simdjson_inline simdjson_result simdjson_result namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -32634,7 +61795,6 @@ simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bo } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } @@ -32824,9 +61984,18 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc depth--; break; default: // Scalar value document - // TODO: Remove any trailing whitespaces + // TODO: We could remove trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } } cur_struct_index++; } @@ -32881,22 +62050,22 @@ inline void document_stream::start_stage1_thread() noexcept { #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result::simdjson_result( - arm64::ondemand::document_stream &&value +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document_stream &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } @@ -32904,9 +62073,9 @@ simdjson_inline simdjson_result::simdjson_resu } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ -/* including simdjson/generic/ondemand/field-inl.h for arm64: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +/* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -32918,10 +62087,10 @@ simdjson_inline simdjson_result::simdjson_resu /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +// clang 6 does not think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept @@ -32947,11 +62116,32 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + simdjson_inline value &field::value() & noexcept { return second; } @@ -32961,35 +62151,53 @@ simdjson_inline value field::value() && noexcept { } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - arm64::ondemand::field &&value +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::field &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { + +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } @@ -32997,9 +62205,9 @@ simdjson_inline simdjson_result simdjson_result simdjson_resultimplementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -33288,6 +62513,12 @@ simdjson_inline uint32_t json_iterator::peek_length(token_position position) con #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... @@ -33333,11 +62564,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -33396,22 +62639,22 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ -/* including simdjson/generic/ondemand/json_type-inl.h for arm64: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -33422,7 +62665,7 @@ simdjson_inline simdjson_result::simdjson_result /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { @@ -33462,7 +62705,6 @@ simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } - simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } @@ -33517,22 +62759,22 @@ simdjson_inline void number::skip_double() noexcept { } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for arm64 */ -/* including simdjson/generic/ondemand/logger-inl.h for arm64: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ +/* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -33547,7 +62789,7 @@ simdjson_inline simdjson_result::simdjson_result(err #include namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { namespace logger { @@ -33754,13 +62996,13 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de } // namespace logger } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for arm64 */ -/* including simdjson/generic/ondemand/object-inl.h for arm64: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/logger-inl.h for haswell */ +/* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -33775,7 +63017,7 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { @@ -33927,6 +63169,14 @@ inline simdjson_result object::at_pointer(std::string_view json_pointer) return child; } +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. @@ -33951,79 +63201,87 @@ simdjson_inline simdjson_result object::reset() & noexcept { } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); + return std::forward(first).find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first)[key]; + return std::forward(first)[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field(key); + return std::forward(first).find_field(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } -inline simdjson_result simdjson_result::reset() noexcept { +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } -inline simdjson_result simdjson_result::is_empty() noexcept { +inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for arm64 */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/object-inl.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -34035,7 +63293,7 @@ simdjson_inline simdjson_result simdjson_result::simdjson_result( - arm64::ondemand::object_iterator &&value +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::object_iterator &&value ) noexcept - : implementation_simdjson_result_base(std::forward(value)) + : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) { } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; @@ -34162,9 +63420,9 @@ simdjson_inline simdjson_result &simdjson_resu } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ -/* including simdjson/generic/ondemand/parser-inl.h for arm64: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -34181,7 +63439,7 @@ simdjson_inline simdjson_result &simdjson_resu /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept @@ -34209,6 +63467,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -34225,6 +63488,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -34296,13 +63580,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -34331,22 +63615,22 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for arm64 */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for arm64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/parser-inl.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -34359,7 +63643,7 @@ simdjson_inline simdjson_result::simdjson_result(error_ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} @@ -34369,36 +63653,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -34410,7 +63697,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -34525,34 +63812,34 @@ simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, cons } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ -/* including simdjson/generic/ondemand/serialization-inl.h for arm64: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +/* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -34579,43 +63866,43 @@ inline std::string_view trim(const std::string_view str) noexcept { } -inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept { +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(arm64::ondemand::document_reference& x) noexcept { +inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept { +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ - using namespace arm64::ondemand; - arm64::ondemand::json_type t; + using namespace haswell::ondemand; + haswell::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { - arm64::ondemand::array array; + haswell::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { - arm64::ondemand::object object; + haswell::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); @@ -34625,50 +63912,50 @@ inline simdjson_result to_json_string(arm64::ondemand::value& } } -inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept { +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept { +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson -namespace simdjson { namespace arm64 { namespace ondemand { +namespace simdjson { namespace haswell { namespace ondemand { #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -34677,12 +63964,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::va throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -34694,7 +63981,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::va #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34703,12 +63990,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::ar throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34720,7 +64007,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::ar #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34729,7 +64016,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::do throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34738,16 +64025,16 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::do throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34759,7 +64046,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::do #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34768,12 +64055,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::ob throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -34783,12 +64070,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::ob } } #endif -}}} // namespace simdjson::arm64::ondemand +}}} // namespace simdjson::haswell::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for arm64 */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -34799,7 +64086,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::ob /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace arm64 { +namespace haswell { namespace ondemand { simdjson_inline token_iterator::token_iterator( @@ -34828,6 +64115,11 @@ simdjson_inline uint32_t token_iterator::peek_length(token_position position) co return *(position+1) - *position; } +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } @@ -34865,498 +64157,22 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con } } // namespace ondemand -} // namespace arm64 +} // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ -/* including simdjson/generic/ondemand/value-inl.h for arm64: #include "simdjson/generic/ondemand/value-inl.h" */ -/* begin file simdjson/generic/ondemand/value-inl.h for arm64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace arm64 { -namespace ondemand { - -simdjson_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_inline value value::resume(const value_iterator &iter) noexcept { - return iter; -} - -simdjson_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); - } else { - return object::resume(iter); - } -} - -simdjson_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { - return iter.get_string(allow_replacement); -} -template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { - return iter.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result value::get_wobbly_string() noexcept { - return iter.get_wobbly_string(); -} -simdjson_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); -} -simdjson_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); -} -simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); -} -simdjson_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); -} -simdjson_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); -} -simdjson_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); -} -simdjson_inline simdjson_result value::is_null() noexcept { - return iter.is_null(); -} -template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } - -template simdjson_inline error_code value::get(T &out) noexcept { - return get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_inline value::operator object() noexcept(false) { - return get_object(); -} -simdjson_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); -} -simdjson_inline value::operator int64_t() noexcept(false) { - return get_int64(); -} -simdjson_inline value::operator double() noexcept(false) { - return get_double(); -} -simdjson_inline value::operator std::string_view() noexcept(false) { - return get_string(false); -} -simdjson_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); -} -simdjson_inline value::operator bool() noexcept(false) { - return get_bool(); -} -#endif - -simdjson_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); -} - -simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); -} - -simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} - -simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; -} - -simdjson_inline simdjson_result value::type() noexcept { - return iter.type(); -} - -simdjson_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_inline bool value::is_negative() noexcept { - return iter.is_negative(); -} - -simdjson_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); -} - -simdjson_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); -} - -simdjson_inline simdjson_result value::raw_json() noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: { - ondemand::array array; - SIMDJSON_TRY(get_array().get(array)); - return array.raw_json(); - } - case json_type::object: { - ondemand::object object; - SIMDJSON_TRY(get_object().get(object)); - return object.raw_json(); - } - default: - return raw_json_token(); - } -} - -simdjson_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); -} - -simdjson_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); -} - -simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace arm64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - arm64::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; -} - -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} - -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} - -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; -} - -simdjson_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); -} -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); -} -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); -} - -template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); -} -template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; -} - -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator arm64::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator arm64::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} - -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} - -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H -/* end file simdjson/generic/ondemand/value-inl.h for arm64 */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -35365,13 +64181,13 @@ simdjson_inline simdjson_result simdjson_result value_iterator::find_ } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -35608,7 +64424,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_ #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -35950,7 +64766,7 @@ simdjson_inline simdjson_result value_iterator::get_number() noexcept { } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -35965,8 +64781,8 @@ simdjson_inline simdjson_result value_iterator::is_root_integer(bool check return answer; } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_start_length(); +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -35974,7 +64790,12 @@ simdjson_inline simdjson_result value_iterator::get_root_num uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); @@ -35982,15 +64803,21 @@ simdjson_inline simdjson_result value_iterator::get_root_num return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } number num; @@ -36022,7 +64849,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iter return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -36038,7 +64865,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -36054,7 +64881,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -36071,7 +64898,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -36088,7 +64915,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -36108,7 +64935,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -36126,27 +64953,33 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } return result; } @@ -36212,6 +65045,9 @@ simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); @@ -36355,289 +65191,786 @@ simdjson_inline simdjson_result value_iterator::reset_object() noexcept { return started_object(); } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ + + +/* end file simdjson/generic/ondemand/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_ONDEMAND_H +/* end file simdjson/haswell/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake/ondemand.h: #include "simdjson/icelake/ondemand.h" */ +/* begin file simdjson/icelake/ondemand.h */ +#ifndef SIMDJSON_ICELAKE_ONDEMAND_H +#define SIMDJSON_ICELAKE_ONDEMAND_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; - } -} + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability + // (AMD Zen4 has terrible performance with it, it is effectively broken) + // _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this); + _mm512_storeu_si512(output, compressed); // could use a mask + } -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); -} + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); -} + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; -} // namespace ondemand -} // namespace arm64 -} // namespace simdjson + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} -namespace simdjson { + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } -} // namespace simdjson + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ -/* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ -/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ -/* begin file simdjson/arm64/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/arm64/end.h */ + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; -#endif // SIMDJSON_ARM64_ONDEMAND_H -/* end file simdjson/arm64/ondemand.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) -/* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ -/* begin file simdjson/fallback/ondemand.h */ -#ifndef SIMDJSON_FALLBACK_ONDEMAND_H -#define SIMDJSON_FALLBACK_ONDEMAND_H + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ -/* begin file simdjson/fallback/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ -#define SIMDJSON_IMPLEMENTATION fallback -/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ -/* begin file simdjson/fallback/base.h */ -#ifndef SIMDJSON_FALLBACK_BASE_H -#define SIMDJSON_FALLBACK_BASE_H + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} -namespace simdjson { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback { + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } -class implementation; + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } -} // namespace fallback -} // namespace simdjson + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } -#endif // SIMDJSON_FALLBACK_BASE_H -/* end file simdjson/fallback/base.h */ -/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ -/* begin file simdjson/fallback/bitmanipulation.h */ -#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H -#define SIMDJSON_FALLBACK_BITMANIPULATION_H + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } -namespace simdjson { -namespace fallback { -namespace { + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; -} -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; -} -#endif + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// _MSC_VER -} +} // namespace simd } // unnamed namespace -} // namespace fallback +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H -/* end file simdjson/fallback/bitmanipulation.h */ -/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ -/* begin file simdjson/fallback/stringparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace { +using namespace simd; + // Holds backslashes and quotes locations. struct backslash_and_quote { public: - static constexpr uint32_t BYTES_PROCESSED = 1; + static constexpr uint32_t BYTES_PROCESSED = 64; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_inline bool has_quote_first() { return c == '"'; } - simdjson_inline bool has_backslash() { return c == '\\'; } - simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - uint8_t c; + uint64_t bs_bits; + uint64_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); // store to dest unconditionally - we can overwrite the bits we don't like later - dst[0] = src[0]; - return { src[0] }; + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; } } // unnamed namespace -} // namespace fallback +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -/* end file simdjson/fallback/stringparsing_defs.h */ -/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ -/* begin file simdjson/fallback/numberparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif - namespace simdjson { -namespace fallback { +namespace icelake { namespace numberparsing { -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - -/** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - return parse_eight_digits_unrolled(reinterpret_cast(chars)); -} - -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest } -#endif /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -36647,23 +65980,23 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } } // namespace numberparsing -} // namespace fallback +} // namespace icelake } // namespace simdjson #define SIMDJSON_SWAR_NUMBER_PARSING 1 -#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -/* end file simdjson/fallback/numberparsing_defs.h */ -/* end file simdjson/fallback/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for icelake: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for icelake */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for fallback */ +/* including simdjson/generic/ondemand/base.h for icelake: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -36672,7 +66005,7 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * @@ -36683,8 +66016,8 @@ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; -/** @copydoc simdjson::fallback::number_type */ -using number_type = simdjson::fallback::number_type; +/** @copydoc simdjson::icelake::number_type */ +using number_type = simdjson::icelake::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; @@ -36707,13 +66040,139 @@ class value; class value_iterator; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for fallback */ -/* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ +/* end file simdjson/generic/ondemand/base.h for icelake */ +/* including simdjson/generic/ondemand/deserialize.h for icelake: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for icelake */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = icelake::ondemand::value; + using document_type = icelake::ondemand::document; + using document_reference_type = icelake::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -36723,7 +66182,7 @@ class value_iterator; /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -36841,7 +66300,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -36851,7 +66311,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -36973,7 +66434,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -36983,7 +66445,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -37091,6 +66554,7 @@ class value_iterator { simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -37185,18 +66649,19 @@ class value_iterator { friend class object; friend class array; friend class value; + friend class field; }; // value_iterator } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -37204,9 +66669,9 @@ struct simdjson_result : public fallback::im } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H -/* end file simdjson/generic/ondemand/value_iterator.h for fallback */ -/* including simdjson/generic/ondemand/value.h for fallback: #include "simdjson/generic/ondemand/value.h" */ -/* begin file simdjson/generic/ondemand/value.h for fallback */ +/* end file simdjson/generic/ondemand/value_iterator.h for icelake */ +/* including simdjson/generic/ondemand/value.h for icelake: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -37214,12 +66679,15 @@ struct simdjson_result : public fallback::im /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace fallback { -namespace ondemand { +namespace icelake { +namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. @@ -37244,15 +66712,21 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } + /** * Get this value as the given type. * @@ -37262,7 +66736,38 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) noexcept; + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } /** * Cast this JSON value to an array. @@ -37338,6 +66843,17 @@ class value { * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. @@ -37403,6 +66919,17 @@ class value { simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); /** * Cast this JSON value to an array. * @@ -37566,7 +67093,7 @@ class value { * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. @@ -37578,6 +67105,7 @@ class value { simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -37602,6 +67130,13 @@ class value { * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; /** * Checks whether the value is a negative number. @@ -37634,10 +67169,12 @@ class value { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not @@ -37664,6 +67201,8 @@ class value { * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you @@ -37679,7 +67218,6 @@ class value { */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - /** * Get the raw JSON for this token. * @@ -37777,6 +67315,20 @@ class value { */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + protected: /** * Create a value. @@ -37816,23 +67368,24 @@ class value { friend class object; friend struct simdjson_result; friend struct simdjson_result; + friend class field; }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result get_array() noexcept; - simdjson_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; @@ -37844,7 +67397,7 @@ struct simdjson_result : public fallback::implementat template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; @@ -37853,20 +67406,22 @@ struct simdjson_result : public fallback::implementat template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator fallback::ondemand::array() noexcept(false); - simdjson_inline operator fallback::ondemand::object() noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator icelake::ondemand::array() noexcept(false); + simdjson_inline operator icelake::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -37888,9 +67443,9 @@ struct simdjson_result : public fallback::implementat * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -37906,18 +67461,19 @@ struct simdjson_result : public fallback::implementat * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. @@ -37926,12 +67482,13 @@ struct simdjson_result : public fallback::implementat * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; @@ -37941,15 +67498,16 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H -/* end file simdjson/generic/ondemand/value.h for fallback */ -/* including simdjson/generic/ondemand/logger.h for fallback: #include "simdjson/generic/ondemand/logger.h" */ -/* begin file simdjson/generic/ondemand/logger.h for fallback */ +/* end file simdjson/generic/ondemand/value.h for icelake */ +/* including simdjson/generic/ondemand/logger.h for icelake: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -37958,7 +67516,7 @@ struct simdjson_result : public fallback::implementat /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical @@ -38004,13 +67562,13 @@ static inline void log_error(const value_iterator &iter, const char *error, cons } // namespace logger } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H -/* end file simdjson/generic/ondemand/logger.h for fallback */ -/* including simdjson/generic/ondemand/token_iterator.h for fallback: #include "simdjson/generic/ondemand/token_iterator.h" */ -/* begin file simdjson/generic/ondemand/token_iterator.h for fallback */ +/* end file simdjson/generic/ondemand/logger.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator.h for icelake: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -38021,7 +67579,7 @@ static inline void log_error(const value_iterator &iter, const char *error, cons /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -38060,7 +67618,7 @@ class token_iterator { * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -38090,7 +67648,14 @@ class token_iterator { * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; - + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Return the current index. */ @@ -38145,15 +67710,15 @@ class token_iterator { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -38162,9 +67727,9 @@ struct simdjson_result : public fallback::im } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H -/* end file simdjson/generic/ondemand/token_iterator.h for fallback */ -/* including simdjson/generic/ondemand/json_iterator.h for fallback: #include "simdjson/generic/ondemand/json_iterator.h" */ -/* begin file simdjson/generic/ondemand/json_iterator.h for fallback */ +/* end file simdjson/generic/ondemand/token_iterator.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator.h for icelake: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -38175,7 +67740,7 @@ struct simdjson_result : public fallback::im /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -38313,7 +67878,7 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -38341,7 +67906,7 @@ class json_iterator { * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -38352,13 +67917,21 @@ class json_iterator { * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; @@ -38452,6 +68025,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -38467,6 +68043,7 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; + friend class field; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template @@ -38474,15 +68051,15 @@ class json_iterator { }; // json_iterator } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -38491,9 +68068,9 @@ struct simdjson_result : public fallback::imp } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H -/* end file simdjson/generic/ondemand/json_iterator.h for fallback */ -/* including simdjson/generic/ondemand/json_type.h for fallback: #include "simdjson/generic/ondemand/json_type.h" */ -/* begin file simdjson/generic/ondemand/json_type.h for fallback */ +/* end file simdjson/generic/ondemand/json_iterator.h for icelake */ +/* including simdjson/generic/ondemand/json_type.h for icelake: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -38504,7 +68081,7 @@ struct simdjson_result : public fallback::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -38637,15 +68214,15 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t #endif } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -38654,9 +68231,9 @@ struct simdjson_result : public fallback::impleme } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H -/* end file simdjson/generic/ondemand/json_type.h for fallback */ -/* including simdjson/generic/ondemand/raw_json_string.h for fallback: #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string.h for fallback */ +/* end file simdjson/generic/ondemand/json_type.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string.h for icelake: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -38666,7 +68243,7 @@ struct simdjson_result : public fallback::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -38842,30 +68419,30 @@ simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_js } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H -/* end file simdjson/generic/ondemand/raw_json_string.h for fallback */ -/* including simdjson/generic/ondemand/parser.h for fallback: #include "simdjson/generic/ondemand/parser.h" */ -/* begin file simdjson/generic/ondemand/parser.h for fallback */ +/* end file simdjson/generic/ondemand/raw_json_string.h for icelake */ +/* including simdjson/generic/ondemand/parser.h for icelake: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -38877,12 +68454,12 @@ struct simdjson_result : public fallback::i #include namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -38952,6 +68529,22 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). @@ -38966,6 +68559,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -39110,9 +68706,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -39120,7 +68716,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -39143,8 +68739,12 @@ class parser { * behavior of the parser for future operations. */ bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif - /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. @@ -39192,9 +68792,20 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -39208,15 +68819,15 @@ class parser { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -39224,11 +68835,11 @@ struct simdjson_result : public fallback::implementa } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H -/* end file simdjson/generic/ondemand/parser.h for fallback */ +/* end file simdjson/generic/ondemand/parser.h for icelake */ // All other declarations -/* including simdjson/generic/ondemand/array.h for fallback: #include "simdjson/generic/ondemand/array.h" */ -/* begin file simdjson/generic/ondemand/array.h for fallback */ +/* including simdjson/generic/ondemand/array.h for icelake: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -39239,7 +68850,7 @@ struct simdjson_result : public fallback::implementa /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -39275,7 +68886,8 @@ class array { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. * * To check that an array is empty, it is more performant to use * the is_empty() method. @@ -39331,6 +68943,22 @@ class array { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -39402,25 +69030,26 @@ class array { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; @@ -39428,9 +69057,9 @@ struct simdjson_result : public fallback::implementat } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H -/* end file simdjson/generic/ondemand/array.h for fallback */ -/* including simdjson/generic/ondemand/array_iterator.h for fallback: #include "simdjson/generic/ondemand/array_iterator.h" */ -/* begin file simdjson/generic/ondemand/array_iterator.h for fallback */ +/* end file simdjson/generic/ondemand/array.h for icelake */ +/* including simdjson/generic/ondemand/array_iterator.h for icelake: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -39442,7 +69071,7 @@ struct simdjson_result : public fallback::implementat namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -39502,15 +69131,15 @@ class array_iterator { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -39518,28 +69147,31 @@ struct simdjson_result : public fallback::im // Iterator interface // - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H -/* end file simdjson/generic/ondemand/array_iterator.h for fallback */ -/* including simdjson/generic/ondemand/document.h for fallback: #include "simdjson/generic/ondemand/document.h" */ -/* begin file simdjson/generic/ondemand/document.h for fallback */ +/* end file simdjson/generic/ondemand/array_iterator.h for icelake */ +/* including simdjson/generic/ondemand/document.h for icelake: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -39710,22 +69342,39 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); } /** @@ -39739,11 +69388,57 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) & noexcept; + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -39827,7 +69522,8 @@ class document { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. */ simdjson_inline simdjson_result count_elements() & noexcept; /** @@ -39916,7 +69612,7 @@ class document { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array @@ -39939,6 +69635,7 @@ class document { simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -39962,6 +69659,14 @@ class document { */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + /** * Checks whether the document is a negative number. * @@ -39989,9 +69694,11 @@ class document { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected @@ -40109,6 +69816,14 @@ class document { * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values @@ -40125,6 +69840,30 @@ class document { * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing @@ -40165,6 +69904,11 @@ class document { /** * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. */ class document_reference { public: @@ -40190,10 +69934,82 @@ class document_reference { simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; - #if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); @@ -40213,11 +70029,13 @@ class document_reference { simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; @@ -40227,25 +70045,27 @@ class document_reference { simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + private: document *doc{nullptr}; }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -40256,52 +70076,56 @@ struct simdjson_result : public fallback::implemen template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; - #if SIMDJSON_EXCEPTIONS - simdjson_inline operator fallback::ondemand::array() & noexcept(false); - simdjson_inline operator fallback::ondemand::object() & noexcept(false); + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator icelake::ondemand::array() & noexcept(false); + simdjson_inline operator icelake::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator fallback::ondemand::value() noexcept(false); + simdjson_inline operator icelake::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; @@ -40312,14 +70136,14 @@ struct simdjson_result : public fallback::implemen namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result(icelake::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -40330,54 +70154,64 @@ struct simdjson_result : public fallback template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator fallback::ondemand::array() & noexcept(false); - simdjson_inline operator fallback::ondemand::object() & noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator icelake::ondemand::array() & noexcept(false); + simdjson_inline operator icelake::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator fallback::ondemand::value() noexcept(false); + simdjson_inline operator icelake::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H -/* end file simdjson/generic/ondemand/document.h for fallback */ -/* including simdjson/generic/ondemand/document_stream.h for fallback: #include "simdjson/generic/ondemand/document_stream.h" */ -/* begin file simdjson/generic/ondemand/document_stream.h for fallback */ +/* end file simdjson/generic/ondemand/document.h for icelake */ +/* including simdjson/generic/ondemand/document_stream.h for icelake: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -40395,7 +70229,7 @@ struct simdjson_result : public fallback #endif namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -40502,10 +70336,9 @@ class document_stream { class iterator { public: using value_type = simdjson_result; - using reference = value_type; - + using reference = simdjson_result; + using pointer = void; using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; /** @@ -40515,7 +70348,7 @@ class document_stream { /** * Get the current document (or error). */ - simdjson_inline simdjson_result operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -40697,18 +70530,18 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -40716,9 +70549,9 @@ struct simdjson_result : public fallback::i } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H -/* end file simdjson/generic/ondemand/document_stream.h for fallback */ -/* including simdjson/generic/ondemand/field.h for fallback: #include "simdjson/generic/ondemand/field.h" */ -/* begin file simdjson/generic/ondemand/field.h for fallback */ +/* end file simdjson/generic/ondemand/document_stream.h for icelake */ +/* including simdjson/generic/ondemand/field.h for icelake: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -40730,7 +70563,7 @@ struct simdjson_result : public fallback::i /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -40757,12 +70590,39 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; /** * Get the field value. */ @@ -40781,29 +70641,33 @@ class field : public std::pair { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; - simdjson_inline simdjson_result key() noexcept; - simdjson_inline simdjson_result value() noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H -/* end file simdjson/generic/ondemand/field.h for fallback */ -/* including simdjson/generic/ondemand/object.h for fallback: #include "simdjson/generic/ondemand/object.h" */ -/* begin file simdjson/generic/ondemand/object.h for fallback */ +/* end file simdjson/generic/ondemand/field.h for icelake */ +/* including simdjson/generic/ondemand/object.h for icelake: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -40814,7 +70678,7 @@ struct simdjson_result : public fallback::implementat /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { /** @@ -40862,6 +70726,8 @@ class object { * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -40883,7 +70749,7 @@ class object { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. @@ -40899,6 +70765,8 @@ class object { * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -40946,11 +70814,24 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -41012,27 +70893,29 @@ class object { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -41043,9 +70926,9 @@ struct simdjson_result : public fallback::implementa } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H -/* end file simdjson/generic/ondemand/object.h for fallback */ -/* including simdjson/generic/ondemand/object_iterator.h for fallback: #include "simdjson/generic/ondemand/object_iterator.h" */ -/* begin file simdjson/generic/ondemand/object_iterator.h for fallback */ +/* end file simdjson/generic/ondemand/object.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator.h for icelake: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -41056,7 +70939,7 @@ struct simdjson_result : public fallback::implementa /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { class object_iterator { @@ -41097,15 +70980,15 @@ class object_iterator { }; } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public fallback::implementation_simdjson_result_base { +struct simdjson_result : public icelake::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(fallback::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(icelake::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -41114,21 +70997,21 @@ struct simdjson_result : public fallback::i // // Reads key and value, yielding them to the user. - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H -/* end file simdjson/generic/ondemand/object_iterator.h for fallback */ -/* including simdjson/generic/ondemand/serialization.h for fallback: #include "simdjson/generic/ondemand/serialization.h" */ -/* begin file simdjson/generic/ondemand/serialization.h for fallback */ +/* end file simdjson/generic/ondemand/object_iterator.h for icelake */ +/* including simdjson/generic/ondemand/serialization.h for icelake: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -41142,30 +71025,30 @@ namespace simdjson { * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept; +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ -inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept; +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept; +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** @@ -41175,7 +71058,7 @@ inline simdjson_result to_json_string(simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -41197,9 +71080,9 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -41209,13 +71092,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value); +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not @@ -41225,346 +71108,1127 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif -}}} // namespace simdjson::fallback::ondemand +}}} // namespace simdjson::icelake::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for icelake */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for icelake: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for icelake */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + icelake::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + icelake::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for icelake */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for icelake: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for icelake */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::array_iterator &&value +) noexcept + : icelake::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : icelake::implementation_simdjson_result_base({}, error) +{ +} -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H -/* end file simdjson/generic/ondemand/serialization.h for fallback */ +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} -// Inline definitions -/* including simdjson/generic/ondemand/array-inl.h for fallback: #include "simdjson/generic/ondemand/array-inl.h" */ -/* begin file simdjson/generic/ondemand/array-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/value-inl.h for icelake: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { -// -// ### Live States -// -// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the array is first found and the iterator is just past the `{`. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, -// depth == iter->depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter->depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the array iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an -// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter->depth == depth, and at_start == false. -// -// ## Terminal State -// -// The terminal state has iter->depth < depth. at_start is always false. -// -// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this -// by decrementing depth. In this state, iter->depth < depth, at_start == false, and -// error == SUCCESS. -// - -simdjson_inline array::array(const value_iterator &_iter) noexcept +simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} -simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); } -simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); } -simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { - bool has_value; - SIMDJSON_TRY(iter.started_array().get(has_value)); - return array(iter); +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } } -simdjson_inline simdjson_result array::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} #endif - return array_iterator(iter); + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); } -simdjson_inline simdjson_result array::end() noexcept { - return array_iterator(iter); +simdjson_inline simdjson_result value::end() & noexcept { + return {}; } -simdjson_inline error_code array::consume() noexcept { - auto error = iter.json_iter().skip_child(iter.depth()-1); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); } -simdjson_inline simdjson_result array::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline simdjson_result array::count_elements() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the array after counting the number of elements. - iter.reset_array(); - return count; +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_inline simdjson_result array::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_array().get(is_not_empty); +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); if(error) { return error; } - return !is_not_empty; + return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -inline simdjson_result array::reset() & noexcept { - return iter.reset_array(); +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); } +} - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - // Get the child - auto child = at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; +} - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; } - return child; } -simdjson_inline simdjson_result array::at(size_t index) noexcept { - size_t i = 0; - for (auto value : *this) { - if (i == index) { return value; } - i++; - } - return INDEX_OUT_OF_BOUNDS; +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; } -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) - ) -{ +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept - : implementation_simdjson_result_base(error) -{ +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); } -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } - return first.begin(); + return std::move(first); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { + +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } - return first.end(); + return first.type(); } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.is_scalar(); } -simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { if (error()) { return error(); } - return first.is_empty(); + return first.is_string(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } - return first.at(index); + return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); + return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } - return first.raw_json(); + return first.get_number_type(); } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H -/* end file simdjson/generic/ondemand/array-inl.h for fallback */ -/* including simdjson/generic/ondemand/array_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace fallback { -namespace ondemand { - -simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} - -simdjson_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); } -simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); } -simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); +simdjson_inline simdjson_result::operator icelake::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if (( error = iter.error() )) { return *this; } - if (( error = iter.skip_child() )) { return *this; } - if (( error = iter.has_next_element().error() )) { return *this; } - return *this; +simdjson_inline simdjson_result::operator icelake::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::array_iterator &&value -) noexcept - : fallback::implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : fallback::implementation_simdjson_result_base({}, error) -{ + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } - return *first; + return first.current_location(); } -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/document-inl.h for fallback: #include "simdjson/generic/ondemand/document-inl.h" */ -/* begin file simdjson/generic/ondemand/document-inl.h for fallback */ +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for icelake */ +/* including simdjson/generic/ondemand/document-inl.h for icelake: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept @@ -41714,22 +72378,29 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -41773,3509 +72444,3271 @@ simdjson_inline simdjson_result document::find_field(const char *key) & n return start_or_resume_object().find_field(key); } simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; -} - -simdjson_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; -} - -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); -} - -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); -} - -simdjson_inline simdjson_result document::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); -} - -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); -} - -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); -} - -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); -} - - -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); -} - -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); -} -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); -} -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); -} -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); -} - -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; -} - -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} - -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} - - -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} - -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} - -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} - -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} - - -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; } -#endif +simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); } +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); } +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} -} // namespace simdjson +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} -namespace simdjson { -namespace fallback { -namespace ondemand { +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} -#if SIMDJSON_EXCEPTIONS -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson - - namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} - -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } - return first.current_location(); + return first.get(); } - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return first.raw_json_token(); + return std::forward(first).get(); } - -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for fallback */ -/* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include - -namespace simdjson { -namespace fallback { -namespace ondemand { - -#ifdef SIMDJSON_THREADS_ENABLED - -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); -} - -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); -} - -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); -} - - -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } -} - -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); -} - -#endif // SIMDJSON_THREADS_ENABLED - -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif -} - -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ -} - -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif -} - -inline size_t document_stream::size_in_bytes() const noexcept { - return len; -} - -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; -} - -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { -} - -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { -} - -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } - return simdjson_result(stream->doc, stream->error); -} - -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; + return first.get(out); } - -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); } - -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } - } - #endif // SIMDJSON_THREADS_ENABLED +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; - - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; - } - } +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: Remove any trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); - } - cur_struct_index++; - } +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -#ifdef SIMDJSON_THREADS_ENABLED - -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); - } +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} -} // namespace ondemand -} // namespace fallback } // namespace simdjson + namespace simdjson { +namespace icelake { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } -} +} // namespace ondemand +} // namespace icelake +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ -/* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { -namespace ondemand { +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); } - -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } - -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); } - -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; } - -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); } - -simdjson_inline value &field::value() & noexcept { - return second; +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); } - -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); } - -simdjson_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { if (error()) { return error(); } - return first.key(); + return first.is_string(); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { +template <> +simdjson_inline error_code simdjson_result::get(icelake::ondemand::document_reference &out) & noexcept { if (error()) { return error(); } - return first.unescaped_key(allow_replacement); + out = first; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::value() noexcept { +template <> +simdjson_inline error_code simdjson_result::get(icelake::ondemand::document_reference &out) && noexcept { if (error()) { return error(); } - return std::move(first.value()); + out = first; + return SUCCESS; } - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for fallback */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace fallback { -namespace ondemand { - -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); } - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } - } - return count == 0; +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } - } - - return report_error(TAPE_ERROR, "not enough close braces"); +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); -#endif +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); - } else { - return reinterpret_cast(token.peek()); - } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); + return first.at_path(json_path); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; -} +} // namespace simdjson -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for icelake */ +/* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#include +#include -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +namespace simdjson { +namespace icelake { +namespace ondemand { -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} +#ifdef SIMDJSON_THREADS_ENABLED -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); } -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; -} -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); } -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } } -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); } -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { - return parser->unescape(in, _string_buf_loc, allow_replacement); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { - return parser->unescape_wobbly(in, _string_buf_loc); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -#if SIMDJSON_DEVELOPMENT_CHECKS - -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -#endif - +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } } - return true; + #endif // SIMDJSON_THREADS_ENABLED } -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; -namespace simdjson { -namespace fallback { -namespace ondemand { + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; } - return out; -} - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); + } } -#endif - - -simdjson_inline number_type number::get_number_type() const noexcept { - return type; +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; } +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; -} + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; -} + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; -} +#ifdef SIMDJSON_THREADS_ENABLED -simdjson_inline number::operator double() const noexcept { - return get_double(); -} +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; - } - if(is_int64()) { - return double(payload.signed_integer); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; -} - -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; -} +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; -} +#endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} -} // namespace simdjson +} -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ -/* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +/* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} -template -static inline std::string string_format(const std::string& format, const Args&... args) +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) { - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); } -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline value &field::value() & noexcept { + return second; } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); } -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); - } - } +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); } -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); - } - } +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); } -} // namespace logger -} // namespace ondemand -} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for fallback */ -/* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } } - return value(iter.child()); + return count == 0; } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; } - return value(iter.child()); + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif - return object_iterator(iter); } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching - } else { - child = find_field(key); - } - if(child.error()) { - return child; // we do not continue if there was an error +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); + if (at_end()) { + return OUT_OF_BOUNDS; } - return child; + return reinterpret_cast(token.peek()); } -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; } -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; } -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); } -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for fallback */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif +} -namespace simdjson { -namespace fallback { -namespace ondemand { +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} -// -// object_iterator -// +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +#if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; -} -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +#endif - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} - -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; } -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } - - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); -#if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); -#endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); - } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); - - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_inline number::operator double() const noexcept { + return get_double(); } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); -} -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); -} -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); -} -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); } -simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; -} -simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; -} -simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; } -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; } } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for fallback */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ +/* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { +#include +#include -namespace fallback { +namespace simdjson { +namespace icelake { namespace ondemand { +namespace logger { -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - return true; +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - return true; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); - } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); } - out << *s; - s++; } } +} // namespace logger } // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); -} -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); -} -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); -} +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ -/* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for icelake */ +/* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace icelake { +namespace ondemand { -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} -inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace fallback::ondemand; - fallback::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - fallback::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - fallback::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +} // namespace ondemand +} // namespace icelake } // namespace simdjson -namespace simdjson { namespace fallback { namespace ondemand { +namespace simdjson { -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } -#endif - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } -#endif - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); } + return first.at_path(json_path); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -#endif -}}} // namespace simdjson::fallback::ondemand -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ -} - -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); -} - +// +// object_iterator +// -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; -} +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; } -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; -} -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); -} -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); -} +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; -} -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; -} + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; -} -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; -} -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; -} -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; -} -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; -} -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/value-inl.h for fallback: #include "simdjson/generic/ondemand/value-inl.h" */ -/* begin file simdjson/generic/ondemand/value-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { +namespace icelake { namespace ondemand { -simdjson_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_inline value value::resume(const value_iterator &iter) noexcept { - return iter; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -simdjson_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { - return object::resume(iter); + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; } - -simdjson_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { - return iter.get_string(allow_replacement); -} -template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { - return iter.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result value::get_wobbly_string() noexcept { - return iter.get_wobbly_string(); -} -simdjson_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } -simdjson_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); +#endif + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } -simdjson_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline simdjson_result value::is_null() noexcept { - return iter.is_null(); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); + } + return iterate(padded_string_view(json)); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template simdjson_inline error_code value::get(T &out) noexcept { - return get().get(out); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -#if SIMDJSON_EXCEPTIONS -simdjson_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_inline value::operator object() noexcept(false) { - return get_object(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline value::operator int64_t() noexcept(false) { - return get_int64(); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline value::operator double() noexcept(false) { - return get_double(); + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); } -simdjson_inline value::operator std::string_view() noexcept(false) { - return get_string(false); +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -simdjson_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline value::operator bool() noexcept(false) { - return get_bool(); +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -#endif -simdjson_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; } -simdjson_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; } -simdjson_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; } -simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } } -simdjson_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace icelake { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -simdjson_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -simdjson_inline simdjson_result value::type() noexcept { - return iter.type(); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline bool value::is_negative() noexcept { - return iter.is_negative(); + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); } -simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; } -simdjson_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); } -simdjson_inline simdjson_result value::raw_json() noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: { - ondemand::array array; - SIMDJSON_TRY(get_array().get(array)); - return array.raw_json(); - } - case json_type::object: { - ondemand::object object; - SIMDJSON_TRY(get_object().get(object)); - return object.raw_json(); - } - default: - return raw_json_token(); - } +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); } -simdjson_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); } -simdjson_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); } -simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; } } } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.raw(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } - return first.count_fields(); + return first.unescape(iter, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } - return first.at(index); + return first.unescape_wobbly(iter); } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +/* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; + + +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); +inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); + +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace icelake::ondemand; + icelake::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + icelake::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + icelake::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +} // namespace simdjson + +namespace simdjson { namespace icelake { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -template simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -template simdjson_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::icelake::ondemand -template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ } -template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator fallback::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); } -simdjson_inline simdjson_result::operator fallback::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; } -simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } -#endif - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} +} // namespace ondemand +} // namespace icelake +} // namespace simdjson -simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} +namespace simdjson { -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H -/* end file simdjson/generic/ondemand/value-inl.h for fallback */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -45284,13 +75717,13 @@ simdjson_inline simdjson_result simdjson_result value_iterator::find_ } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -45527,7 +75960,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_ #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -45869,7 +76302,7 @@ simdjson_inline simdjson_result value_iterator::get_number() noexcept { } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -45884,8 +76317,8 @@ simdjson_inline simdjson_result value_iterator::is_root_integer(bool check return answer; } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_start_length(); +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -45893,7 +76326,12 @@ simdjson_inline simdjson_result value_iterator::get_root_ uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); @@ -45901,15 +76339,21 @@ simdjson_inline simdjson_result value_iterator::get_root_ return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } number num; @@ -45941,7 +76385,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iter return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -45957,7 +76401,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -45973,7 +76417,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -45990,7 +76434,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -46007,7 +76451,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -46027,7 +76471,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -46045,27 +76489,33 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } return result; } @@ -46131,6 +76581,9 @@ simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); @@ -46340,59 +76793,63 @@ simdjson_inline error_code value_iterator::report_error(error_code error, const } } // namespace ondemand -} // namespace fallback +} // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ -/* end file simdjson/generic/ondemand/amalgamated.h for fallback */ -/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ -/* begin file simdjson/fallback/end.h */ +/* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ + + +/* end file simdjson/generic/ondemand/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/fallback/end.h */ +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif -#endif // SIMDJSON_FALLBACK_ONDEMAND_H -/* end file simdjson/fallback/ondemand.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) -/* including simdjson/haswell/ondemand.h: #include "simdjson/haswell/ondemand.h" */ -/* begin file simdjson/haswell/ondemand.h */ -#ifndef SIMDJSON_HASWELL_ONDEMAND_H -#define SIMDJSON_HASWELL_ONDEMAND_H +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ -/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ -/* begin file simdjson/haswell/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ -#define SIMDJSON_IMPLEMENTATION haswell +#endif // SIMDJSON_ICELAKE_ONDEMAND_H +/* end file simdjson/icelake/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64/ondemand.h: #include "simdjson/ppc64/ondemand.h" */ +/* begin file simdjson/ppc64/ondemand.h */ +#ifndef SIMDJSON_PPC64_ONDEMAND_H +#define SIMDJSON_PPC64_ONDEMAND_H -/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ -/* begin file simdjson/haswell/base.h */ -#ifndef SIMDJSON_HASWELL_BASE_H -#define SIMDJSON_HASWELL_BASE_H +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL namespace simdjson { /** - * Implementation for Haswell (Intel AVX2). + * Implementation for ALTIVEC (PPC64). */ -namespace haswell { +namespace ppc64 { class implementation; @@ -46403,84 +76860,48 @@ template struct simd8x64; } // namespace simd } // unnamed namespace -} // namespace haswell +} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_HASWELL_BASE_H -/* end file simdjson/haswell/base.h */ -/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ -/* begin file simdjson/haswell/intrinsics.h */ -#ifndef SIMDJSON_HASWELL_INTRINSICS_H -#define SIMDJSON_HASWELL_INTRINSICS_H +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO - -static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); +// This should be the correct header whether +// you use visual studio or other compilers. +#include -#endif // SIMDJSON_HASWELL_INTRINSICS_H -/* end file simdjson/haswell/intrinsics.h */ +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#ifdef vector +#undef vector #endif -/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ -/* begin file simdjson/haswell/bitmanipulation.h */ -#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H -#define SIMDJSON_HASWELL_BITMANIPULATION_H +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace { // We sometimes call trailing_zero on inputs that are zero, @@ -46493,43 +76914,52 @@ SIMDJSON_NO_SANITIZE_UNDEFINED SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); + return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { +simdjson_inline int count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores + return __popcnt64(input_num); // Visual Studio wants two underscores } #else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); } #endif simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { + uint64_t *result) { #if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); + *result = value1 + value2; + return *result < value1; #else return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); @@ -46537,92 +76967,112 @@ simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, } } // unnamed namespace -} // namespace haswell +} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_HASWELL_BITMANIPULATION_H -/* end file simdjson/haswell/bitmanipulation.h */ -/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ -/* begin file simdjson/haswell/bitmask.h */ -#ifndef SIMDJSON_HASWELL_BITMASK_H -#define SIMDJSON_HASWELL_BITMASK_H +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace { // -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. // // For example, prefix_xor(00100100) == 00011100 // -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; } } // unnamed namespace -} // namespace haswell +} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_HASWELL_BITMASK_H -/* end file simdjson/haswell/bitmask.h */ -/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ -/* begin file simdjson/haswell/numberparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + namespace simdjson { -namespace haswell { +namespace ppc64 { namespace numberparsing { +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ /** @private */ static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } /** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 +#if SIMDJSON_IS_ARM64 // ARM64 has native support for 64-bit multiplications, no need to emultate answer.high = __umulh(value1, value2); answer.low = value1 * value2; #else answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 +#endif // SIMDJSON_IS_ARM64 #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); @@ -46632,401 +77082,507 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t } } // namespace numberparsing -} // namespace haswell +} // namespace ppc64 } // namespace simdjson +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else #define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif -#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H -/* end file simdjson/haswell/numberparsing_defs.h */ -/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ -/* begin file simdjson/haswell/simd.h */ -#ifndef SIMDJSON_HASWELL_SIMD_H -#define SIMDJSON_HASWELL_SIMD_H +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace haswell { +namespace ppc64 { namespace { namespace simd { - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m256i value; - - // Zero constructor - simdjson_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdjson_inline base(const __m256i _value) : value(_value) {} +using __m128i = __vector unsigned char; - // Conversion to SIMD register - simdjson_inline operator const __m256i&() const { return this->value; } - simdjson_inline operator __m256i&() { return this->value; } +template struct base { + __m128i value; - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; + // Zero constructor + simdjson_inline base() : value{__m128i()} {} - // Forward-declared so they can be used by splat and friends. - template - struct simd8; + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m256i _value) : base>(_value) {} + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; - static const int SIZE = sizeof(base::value); + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); - } - }; + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + static const int SIZE = sizeof(base>::value); - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; - simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; - // Store to array - simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); - } + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint32_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in four steps, first 8 bytes and then second 8 bytes... - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits - uint8_t mask3 = uint8_t(mask >> 16); // ... - uint8_t mask4 = uint8_t(mask >> 24); // ... - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], - thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask and so forth - shufmask = - _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, - 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); - // we still need to put the pieces back together. - // we compute the popcount of the first words: - int pop1 = BitsSetTable256mul2[mask1]; - int pop3 = BitsSetTable256mul2[mask3]; + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - // then load the corresponding mask - // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. - __m256i v256 = _mm256_castsi128_si256( - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); - __m256i compactmask = _mm256_insertf128_si256(v256, - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); - __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); - // We just need to write out the result. - // This is the tricky bit that is hard to do - // if we want to return a SIMD register, since there - // is no single-instruction approach to recombine - // the two 128-bit lanes with an offset. - __m128i v128; - v128 = _mm256_castsi256_si128(almostthere); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); - v128 = _mm256_extractf128_si256(almostthere, 1); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); - } + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } - }; + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } - }; +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - uint32_t mask1 = uint32_t(mask); - uint32_t mask2 = uint32_t(mask >> 32); - this->chunks[0].compress(mask1, output); - this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); - return 64 - count_ones(mask); - } +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - } + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed - simdjson_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} - simdjson_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask - ); - } + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask - ).to_bitmask(); - } + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1] - ).to_bitmask(); - } + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } -} // namespace simd + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 +} // namespace simd } // unnamed namespace -} // namespace haswell +} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_HASWELL_SIMD_H -/* end file simdjson/haswell/simd.h */ -/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ -/* begin file simdjson/haswell/stringparsing_defs.h */ -#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace { using namespace simd; @@ -47035,46 +77591,65 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { - static_cast((v == '\\').to_bitmask()), // bs_bits - static_cast((v == '"').to_bitmask()), // quote_bits + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace -} // namespace haswell +} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H -/* end file simdjson/haswell/stringparsing_defs.h */ -/* end file simdjson/haswell/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for haswell: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for haswell */ +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for ppc64: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for ppc64 */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for haswell: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for haswell */ +/* including simdjson/generic/ondemand/base.h for ppc64: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -47083,7 +77658,7 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * @@ -47094,8 +77669,8 @@ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; -/** @copydoc simdjson::haswell::number_type */ -using number_type = simdjson::haswell::number_type; +/** @copydoc simdjson::ppc64::number_type */ +using number_type = simdjson::ppc64::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; @@ -47118,13 +77693,139 @@ class value; class value_iterator; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for haswell */ -/* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for haswell */ +/* end file simdjson/generic/ondemand/base.h for ppc64 */ +/* including simdjson/generic/ondemand/deserialize.h for ppc64: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for ppc64 */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = ppc64::ondemand::value; + using document_type = ppc64::ondemand::document; + using document_reference_type = ppc64::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -47134,7 +77835,7 @@ class value_iterator; /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -47252,7 +77953,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -47262,7 +77964,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -47384,7 +78087,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -47394,7 +78098,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -47502,6 +78207,7 @@ class value_iterator { simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -47596,18 +78302,19 @@ class value_iterator { friend class object; friend class array; friend class value; + friend class field; }; // value_iterator } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -47615,9 +78322,9 @@ struct simdjson_result : public haswell::impl } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H -/* end file simdjson/generic/ondemand/value_iterator.h for haswell */ -/* including simdjson/generic/ondemand/value.h for haswell: #include "simdjson/generic/ondemand/value.h" */ -/* begin file simdjson/generic/ondemand/value.h for haswell */ +/* end file simdjson/generic/ondemand/value_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/value.h for ppc64: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -47625,12 +78332,15 @@ struct simdjson_result : public haswell::impl /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace haswell { -namespace ondemand { +namespace ppc64 { +namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. @@ -47655,15 +78365,21 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } + /** * Get this value as the given type. * @@ -47673,7 +78389,38 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) noexcept; + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } /** * Cast this JSON value to an array. @@ -47749,6 +78496,17 @@ class value { * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. @@ -47814,6 +78572,17 @@ class value { simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); /** * Cast this JSON value to an array. * @@ -47977,7 +78746,7 @@ class value { * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. @@ -47989,6 +78758,7 @@ class value { simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -48013,6 +78783,13 @@ class value { * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; /** * Checks whether the value is a negative number. @@ -48045,10 +78822,12 @@ class value { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not @@ -48075,6 +78854,8 @@ class value { * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you @@ -48090,7 +78871,6 @@ class value { */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - /** * Get the raw JSON for this token. * @@ -48188,6 +78968,20 @@ class value { */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + protected: /** * Create a value. @@ -48227,23 +79021,24 @@ class value { friend class object; friend struct simdjson_result; friend struct simdjson_result; + friend class field; }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result get_array() noexcept; - simdjson_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; @@ -48255,7 +79050,7 @@ struct simdjson_result : public haswell::implementatio template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; @@ -48264,20 +79059,22 @@ struct simdjson_result : public haswell::implementatio template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator haswell::ondemand::array() noexcept(false); - simdjson_inline operator haswell::ondemand::object() noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator ppc64::ondemand::array() noexcept(false); + simdjson_inline operator ppc64::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -48299,9 +79096,9 @@ struct simdjson_result : public haswell::implementatio * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -48317,18 +79114,19 @@ struct simdjson_result : public haswell::implementatio * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. @@ -48337,12 +79135,13 @@ struct simdjson_result : public haswell::implementatio * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; @@ -48352,15 +79151,16 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H -/* end file simdjson/generic/ondemand/value.h for haswell */ -/* including simdjson/generic/ondemand/logger.h for haswell: #include "simdjson/generic/ondemand/logger.h" */ -/* begin file simdjson/generic/ondemand/logger.h for haswell */ +/* end file simdjson/generic/ondemand/value.h for ppc64 */ +/* including simdjson/generic/ondemand/logger.h for ppc64: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -48369,7 +79169,7 @@ struct simdjson_result : public haswell::implementatio /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical @@ -48415,13 +79215,13 @@ static inline void log_error(const value_iterator &iter, const char *error, cons } // namespace logger } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H -/* end file simdjson/generic/ondemand/logger.h for haswell */ -/* including simdjson/generic/ondemand/token_iterator.h for haswell: #include "simdjson/generic/ondemand/token_iterator.h" */ -/* begin file simdjson/generic/ondemand/token_iterator.h for haswell */ +/* end file simdjson/generic/ondemand/logger.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator.h for ppc64: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -48432,7 +79232,7 @@ static inline void log_error(const value_iterator &iter, const char *error, cons /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -48471,7 +79271,7 @@ class token_iterator { * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -48501,7 +79301,14 @@ class token_iterator { * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; - + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Return the current index. */ @@ -48556,15 +79363,15 @@ class token_iterator { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -48573,9 +79380,9 @@ struct simdjson_result : public haswell::impl } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H -/* end file simdjson/generic/ondemand/token_iterator.h for haswell */ -/* including simdjson/generic/ondemand/json_iterator.h for haswell: #include "simdjson/generic/ondemand/json_iterator.h" */ -/* begin file simdjson/generic/ondemand/json_iterator.h for haswell */ +/* end file simdjson/generic/ondemand/token_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/json_iterator.h for ppc64: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -48586,7 +79393,7 @@ struct simdjson_result : public haswell::impl /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -48724,7 +79531,7 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -48752,7 +79559,7 @@ class json_iterator { * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -48763,13 +79570,21 @@ class json_iterator { * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; @@ -48863,6 +79678,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -48878,6 +79696,7 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; + friend class field; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template @@ -48885,15 +79704,15 @@ class json_iterator { }; // json_iterator } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -48902,9 +79721,9 @@ struct simdjson_result : public haswell::imple } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H -/* end file simdjson/generic/ondemand/json_iterator.h for haswell */ -/* including simdjson/generic/ondemand/json_type.h for haswell: #include "simdjson/generic/ondemand/json_type.h" */ -/* begin file simdjson/generic/ondemand/json_type.h for haswell */ +/* end file simdjson/generic/ondemand/json_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type.h for ppc64: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -48915,7 +79734,7 @@ struct simdjson_result : public haswell::imple /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -49048,15 +79867,15 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t #endif } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -49065,9 +79884,9 @@ struct simdjson_result : public haswell::implement } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H -/* end file simdjson/generic/ondemand/json_type.h for haswell */ -/* including simdjson/generic/ondemand/raw_json_string.h for haswell: #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string.h for haswell */ +/* end file simdjson/generic/ondemand/json_type.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -49077,7 +79896,7 @@ struct simdjson_result : public haswell::implement /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -49253,30 +80072,30 @@ simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_js } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H -/* end file simdjson/generic/ondemand/raw_json_string.h for haswell */ -/* including simdjson/generic/ondemand/parser.h for haswell: #include "simdjson/generic/ondemand/parser.h" */ -/* begin file simdjson/generic/ondemand/parser.h for haswell */ +/* end file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ +/* including simdjson/generic/ondemand/parser.h for ppc64: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -49288,12 +80107,12 @@ struct simdjson_result : public haswell::imp #include namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -49363,6 +80182,22 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). @@ -49377,6 +80212,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -49521,9 +80359,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -49531,7 +80369,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -49554,8 +80392,12 @@ class parser { * behavior of the parser for future operations. */ bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif - /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. @@ -49603,9 +80445,20 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -49619,15 +80472,15 @@ class parser { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -49635,11 +80488,11 @@ struct simdjson_result : public haswell::implementati } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H -/* end file simdjson/generic/ondemand/parser.h for haswell */ +/* end file simdjson/generic/ondemand/parser.h for ppc64 */ // All other declarations -/* including simdjson/generic/ondemand/array.h for haswell: #include "simdjson/generic/ondemand/array.h" */ -/* begin file simdjson/generic/ondemand/array.h for haswell */ +/* including simdjson/generic/ondemand/array.h for ppc64: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -49650,7 +80503,7 @@ struct simdjson_result : public haswell::implementati /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -49686,7 +80539,8 @@ class array { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. * * To check that an array is empty, it is more performant to use * the is_empty() method. @@ -49742,6 +80596,22 @@ class array { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -49813,25 +80683,26 @@ class array { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; @@ -49839,9 +80710,9 @@ struct simdjson_result : public haswell::implementatio } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H -/* end file simdjson/generic/ondemand/array.h for haswell */ -/* including simdjson/generic/ondemand/array_iterator.h for haswell: #include "simdjson/generic/ondemand/array_iterator.h" */ -/* begin file simdjson/generic/ondemand/array_iterator.h for haswell */ +/* end file simdjson/generic/ondemand/array.h for ppc64 */ +/* including simdjson/generic/ondemand/array_iterator.h for ppc64: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -49853,7 +80724,7 @@ struct simdjson_result : public haswell::implementatio namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -49913,15 +80784,15 @@ class array_iterator { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -49929,28 +80800,31 @@ struct simdjson_result : public haswell::impl // Iterator interface // - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H -/* end file simdjson/generic/ondemand/array_iterator.h for haswell */ -/* including simdjson/generic/ondemand/document.h for haswell: #include "simdjson/generic/ondemand/document.h" */ -/* begin file simdjson/generic/ondemand/document.h for haswell */ +/* end file simdjson/generic/ondemand/array_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/document.h for ppc64: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -50121,22 +80995,39 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); } /** @@ -50150,11 +81041,57 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) & noexcept; + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -50238,7 +81175,8 @@ class document { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. */ simdjson_inline simdjson_result count_elements() & noexcept; /** @@ -50327,7 +81265,7 @@ class document { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array @@ -50350,6 +81288,7 @@ class document { simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -50373,6 +81312,14 @@ class document { */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + /** * Checks whether the document is a negative number. * @@ -50400,9 +81347,11 @@ class document { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected @@ -50520,6 +81469,14 @@ class document { * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values @@ -50536,6 +81493,30 @@ class document { * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing @@ -50576,6 +81557,11 @@ class document { /** * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. */ class document_reference { public: @@ -50601,10 +81587,82 @@ class document_reference { simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; - #if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); @@ -50624,11 +81682,13 @@ class document_reference { simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; @@ -50638,25 +81698,27 @@ class document_reference { simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + private: document *doc{nullptr}; }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -50667,52 +81729,56 @@ struct simdjson_result : public haswell::implementa template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; - #if SIMDJSON_EXCEPTIONS - simdjson_inline operator haswell::ondemand::array() & noexcept(false); - simdjson_inline operator haswell::ondemand::object() & noexcept(false); + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator ppc64::ondemand::array() & noexcept(false); + simdjson_inline operator ppc64::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator haswell::ondemand::value() noexcept(false); + simdjson_inline operator ppc64::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; @@ -50723,14 +81789,14 @@ struct simdjson_result : public haswell::implementa namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result(ppc64::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -50741,54 +81807,64 @@ struct simdjson_result : public haswell:: template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator haswell::ondemand::array() & noexcept(false); - simdjson_inline operator haswell::ondemand::object() & noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator ppc64::ondemand::array() & noexcept(false); + simdjson_inline operator ppc64::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator haswell::ondemand::value() noexcept(false); + simdjson_inline operator ppc64::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H -/* end file simdjson/generic/ondemand/document.h for haswell */ -/* including simdjson/generic/ondemand/document_stream.h for haswell: #include "simdjson/generic/ondemand/document_stream.h" */ -/* begin file simdjson/generic/ondemand/document_stream.h for haswell */ +/* end file simdjson/generic/ondemand/document.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream.h for ppc64: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -50806,7 +81882,7 @@ struct simdjson_result : public haswell:: #endif namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -50913,10 +81989,9 @@ class document_stream { class iterator { public: using value_type = simdjson_result; - using reference = value_type; - + using reference = simdjson_result; + using pointer = void; using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; /** @@ -50926,7 +82001,7 @@ class document_stream { /** * Get the current document (or error). */ - simdjson_inline simdjson_result operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -51108,18 +82183,18 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -51127,9 +82202,9 @@ struct simdjson_result : public haswell::imp } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H -/* end file simdjson/generic/ondemand/document_stream.h for haswell */ -/* including simdjson/generic/ondemand/field.h for haswell: #include "simdjson/generic/ondemand/field.h" */ -/* begin file simdjson/generic/ondemand/field.h for haswell */ +/* end file simdjson/generic/ondemand/document_stream.h for ppc64 */ +/* including simdjson/generic/ondemand/field.h for ppc64: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -51141,7 +82216,7 @@ struct simdjson_result : public haswell::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -51168,12 +82243,39 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; /** * Get the field value. */ @@ -51192,29 +82294,33 @@ class field : public std::pair { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; - simdjson_inline simdjson_result key() noexcept; - simdjson_inline simdjson_result value() noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H -/* end file simdjson/generic/ondemand/field.h for haswell */ -/* including simdjson/generic/ondemand/object.h for haswell: #include "simdjson/generic/ondemand/object.h" */ -/* begin file simdjson/generic/ondemand/object.h for haswell */ +/* end file simdjson/generic/ondemand/field.h for ppc64 */ +/* including simdjson/generic/ondemand/object.h for ppc64: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -51225,7 +82331,7 @@ struct simdjson_result : public haswell::implementatio /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { /** @@ -51273,6 +82379,8 @@ class object { * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -51294,7 +82402,7 @@ class object { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. @@ -51310,6 +82418,8 @@ class object { * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -51357,11 +82467,24 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -51423,27 +82546,29 @@ class object { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -51454,9 +82579,9 @@ struct simdjson_result : public haswell::implementati } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H -/* end file simdjson/generic/ondemand/object.h for haswell */ -/* including simdjson/generic/ondemand/object_iterator.h for haswell: #include "simdjson/generic/ondemand/object_iterator.h" */ -/* begin file simdjson/generic/ondemand/object_iterator.h for haswell */ +/* end file simdjson/generic/ondemand/object.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator.h for ppc64: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -51467,7 +82592,7 @@ struct simdjson_result : public haswell::implementati /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { class object_iterator { @@ -51508,15 +82633,15 @@ class object_iterator { }; } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public haswell::implementation_simdjson_result_base { +struct simdjson_result : public ppc64::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(haswell::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(ppc64::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -51525,21 +82650,21 @@ struct simdjson_result : public haswell::imp // // Reads key and value, yielding them to the user. - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H -/* end file simdjson/generic/ondemand/object_iterator.h for haswell */ -/* including simdjson/generic/ondemand/serialization.h for haswell: #include "simdjson/generic/ondemand/serialization.h" */ -/* begin file simdjson/generic/ondemand/serialization.h for haswell */ +/* end file simdjson/generic/ondemand/object_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization.h for ppc64: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -51553,30 +82678,30 @@ namespace simdjson { * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept; +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ -inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept; +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept; +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** @@ -51586,7 +82711,7 @@ inline simdjson_result to_json_string(simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -51608,9 +82733,9 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -51620,13 +82745,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value); +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not @@ -51636,22 +82761,237 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif -}}} // namespace simdjson::haswell::ondemand +}}} // namespace simdjson::ppc64::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H -/* end file simdjson/generic/ondemand/serialization.h for haswell */ +/* end file simdjson/generic/ondemand/serialization.h for ppc64 */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for ppc64: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for ppc64 */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + ppc64::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + ppc64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for ppc64 */ // Inline definitions -/* including simdjson/generic/ondemand/array-inl.h for haswell: #include "simdjson/generic/ondemand/array-inl.h" */ -/* begin file simdjson/generic/ondemand/array-inl.h for haswell */ +/* including simdjson/generic/ondemand/array-inl.h for ppc64: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ @@ -51661,7 +83001,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::array_iterator &&value +) noexcept + : ppc64::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : ppc64::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value-inl.h for ppc64: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { - bool has_value; - SIMDJSON_TRY(iter.started_array().get(has_value)); - return array(iter); + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result array::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline simdjson_result array::end() noexcept { - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline error_code array::consume() noexcept { - auto error = iter.json_iter().skip_child(iter.depth()-1); - if(error) { iter.abandon(); } - return error; + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); } -simdjson_inline simdjson_result array::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline simdjson_result array::count_elements() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the array after counting the number of elements. - iter.reset_array(); - return count; +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_inline simdjson_result array::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_array().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); } -inline simdjson_result array::reset() & noexcept { - return iter.reset_array(); +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); } -inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); } +} - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - // Get the child - auto child = at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; } + return true; +} - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; } - return child; } -simdjson_inline simdjson_result array::at(size_t index) noexcept { - size_t i = 0; - for (auto value : *this) { - if (i == index) { return value; } - i++; +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; } - return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error -) noexcept - : implementation_simdjson_result_base(error) +) noexcept : + implementation_simdjson_result_base(error) { } - -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } - return first.end(); + return {}; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } - return first.is_empty(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } - return first.at(index); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } - return first.raw_json(); + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H -/* end file simdjson/generic/ondemand/array-inl.h for haswell */ -/* including simdjson/generic/ondemand/array_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} -namespace simdjson { -namespace haswell { -namespace ondemand { +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} -simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} -simdjson_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); } -simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); } -simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); } -simdjson_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if (( error = iter.error() )) { return *this; } - if (( error = iter.skip_child() )) { return *this; } - if (( error = iter.has_next_element().error() )) { return *this; } - return *this; +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif -} // namespace ondemand -} // namespace haswell -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::array_iterator &&value -) noexcept - : haswell::implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : haswell::implementation_simdjson_result_base({}, error) -{ + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } - return *first; + return first.current_location(); } -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/document-inl.h for haswell: #include "simdjson/generic/ondemand/document-inl.h" */ -/* begin file simdjson/generic/ondemand/document-inl.h for haswell */ +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document-inl.h for ppc64: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept @@ -52125,22 +84031,29 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -52197,7 +84110,14 @@ simdjson_inline simdjson_result document::operator[](const char *key) & n } simdjson_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } @@ -52219,12 +84139,21 @@ simdjson_inline simdjson_result document::type() noexcept { } simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } @@ -52244,7 +84173,7 @@ simdjson_inline simdjson_result document::get_number() noexcept { simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { @@ -52265,275 +84194,305 @@ simdjson_inline simdjson_result document::at_pointer(std::string_view jso } } +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::document &&value +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base( + implementation_simdjson_result_base( error ) { } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first).get(); + return std::forward(first).get(); } template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return std::forward(first).get(out); } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first); + return std::forward(first); } -template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { if (error()) { return error(); } - out = std::forward(first); + out = std::forward(first); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} -simdjson_inline bool simdjson_result::is_negative() noexcept { +simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline bool simdjson_result::at_end() const noexcept { +simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} } // namespace simdjson namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} @@ -52569,15 +84528,25 @@ simdjson_inline simdjson_result document_reference::get_raw_jso simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } - +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif @@ -52594,6 +84563,7 @@ simdjson_inline simdjson_result document_reference::find_field_unordered( simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } @@ -52602,214 +84572,264 @@ simdjson_inline simdjson_result document_reference::get_number_type simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for haswell */ -/* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +/* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -52824,7 +84844,7 @@ simdjson_inline simdjson_result simdjson_result namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -52964,7 +84984,6 @@ simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bo } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } @@ -53154,9 +85173,18 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc depth--; break; default: // Scalar value document - // TODO: Remove any trailing whitespaces + // TODO: We could remove trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } } cur_struct_index++; } @@ -53211,22 +85239,22 @@ inline void document_stream::start_stage1_thread() noexcept { #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::document_stream &&value +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document_stream &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } @@ -53234,9 +85262,9 @@ simdjson_inline simdjson_result::simdjson_re } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ -/* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for haswell */ +/* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -53248,10 +85276,10 @@ simdjson_inline simdjson_result::simdjson_re /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +// clang 6 does not think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept @@ -53277,11 +85305,32 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + simdjson_inline value &field::value() & noexcept { return second; } @@ -53291,35 +85340,53 @@ simdjson_inline value field::value() && noexcept { } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::field &&value +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::field &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { + +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } @@ -53327,9 +85394,9 @@ simdjson_inline simdjson_result simdjson_result simdjson_resultimplementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -53618,6 +85702,12 @@ simdjson_inline uint32_t json_iterator::peek_length(token_position position) con #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... @@ -53663,11 +85753,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -53726,22 +85828,22 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ +/* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -53752,7 +85854,7 @@ simdjson_inline simdjson_result::simdjson_resu /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { @@ -53792,7 +85894,6 @@ simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } - simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } @@ -53847,22 +85948,22 @@ simdjson_inline void number::skip_double() noexcept { } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ -/* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ +/* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -53877,7 +85978,7 @@ simdjson_inline simdjson_result::simdjson_result(e #include namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { namespace logger { @@ -54084,13 +86185,13 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de } // namespace logger } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for haswell */ -/* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for haswell */ +/* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54105,7 +86206,7 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { @@ -54257,6 +86358,14 @@ inline simdjson_result object::at_pointer(std::string_view json_pointer) return child; } +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. @@ -54281,79 +86390,87 @@ simdjson_inline simdjson_result object::reset() & noexcept { } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); + return std::forward(first).find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first)[key]; + return std::forward(first)[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field(key); + return std::forward(first).find_field(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } -inline simdjson_result simdjson_result::reset() noexcept { +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } -inline simdjson_result simdjson_result::is_empty() noexcept { +inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for haswell */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +/* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54365,7 +86482,7 @@ simdjson_inline simdjson_result simdjson_result::simdjson_result( - haswell::ondemand::object_iterator &&value +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::object_iterator &&value ) noexcept - : implementation_simdjson_result_base(std::forward(value)) + : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) { } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; @@ -54492,9 +86609,9 @@ simdjson_inline simdjson_result &simdjson_re } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ +/* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54511,7 +86628,7 @@ simdjson_inline simdjson_result &simdjson_re /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept @@ -54539,6 +86656,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -54555,6 +86677,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -54626,13 +86769,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -54661,22 +86804,22 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for haswell */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +/* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54689,7 +86832,7 @@ simdjson_inline simdjson_result::simdjson_result(erro namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} @@ -54699,36 +86842,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -54740,7 +86886,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -54855,34 +87001,34 @@ simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, cons } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ -/* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54909,43 +87055,43 @@ inline std::string_view trim(const std::string_view str) noexcept { } -inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { +inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ - using namespace haswell::ondemand; - haswell::ondemand::json_type t; + using namespace ppc64::ondemand; + ppc64::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { - haswell::ondemand::array array; + ppc64::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { - haswell::ondemand::object object; + ppc64::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); @@ -54955,50 +87101,50 @@ inline simdjson_result to_json_string(haswell::ondemand::value } } -inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson -namespace simdjson { namespace haswell { namespace ondemand { +namespace simdjson { namespace ppc64 { namespace ondemand { #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -55007,12 +87153,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -55024,7 +87170,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55033,12 +87179,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55050,7 +87196,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55059,7 +87205,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55068,16 +87214,16 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55089,7 +87235,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55098,12 +87244,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -55113,12 +87259,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: } } #endif -}}} // namespace simdjson::haswell::ondemand +}}} // namespace simdjson::ppc64::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +/* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -55129,7 +87275,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand:: /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { +namespace ppc64 { namespace ondemand { simdjson_inline token_iterator::token_iterator( @@ -55158,6 +87304,11 @@ simdjson_inline uint32_t token_iterator::peek_length(token_position position) co return *(position+1) - *position; } +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } @@ -55195,498 +87346,22 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/value-inl.h for haswell: #include "simdjson/generic/ondemand/value-inl.h" */ -/* begin file simdjson/generic/ondemand/value-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace haswell { -namespace ondemand { - -simdjson_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_inline value value::resume(const value_iterator &iter) noexcept { - return iter; -} - -simdjson_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); - } else { - return object::resume(iter); - } -} - -simdjson_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { - return iter.get_string(allow_replacement); -} -template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { - return iter.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result value::get_wobbly_string() noexcept { - return iter.get_wobbly_string(); -} -simdjson_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); -} -simdjson_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); -} -simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); -} -simdjson_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); -} -simdjson_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); -} -simdjson_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); -} -simdjson_inline simdjson_result value::is_null() noexcept { - return iter.is_null(); -} -template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } - -template simdjson_inline error_code value::get(T &out) noexcept { - return get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_inline value::operator object() noexcept(false) { - return get_object(); -} -simdjson_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); -} -simdjson_inline value::operator int64_t() noexcept(false) { - return get_int64(); -} -simdjson_inline value::operator double() noexcept(false) { - return get_double(); -} -simdjson_inline value::operator std::string_view() noexcept(false) { - return get_string(false); -} -simdjson_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); -} -simdjson_inline value::operator bool() noexcept(false) { - return get_bool(); -} -#endif - -simdjson_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); -} - -simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); -} - -simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} - -simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; -} - -simdjson_inline simdjson_result value::type() noexcept { - return iter.type(); -} - -simdjson_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_inline bool value::is_negative() noexcept { - return iter.is_negative(); -} - -simdjson_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); -} - -simdjson_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); -} - -simdjson_inline simdjson_result value::raw_json() noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: { - ondemand::array array; - SIMDJSON_TRY(get_array().get(array)); - return array.raw_json(); - } - case json_type::object: { - ondemand::object object; - SIMDJSON_TRY(get_object().get(object)); - return object.raw_json(); - } - default: - return raw_json_token(); - } -} - -simdjson_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); -} - -simdjson_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); -} - -simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace haswell -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; -} - -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} - -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} - -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; -} - -simdjson_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); -} -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); -} -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); -} - -template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); -} -template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; -} - -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator haswell::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator haswell::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} - -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} - -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H -/* end file simdjson/generic/ondemand/value-inl.h for haswell */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +/* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -55695,13 +87370,13 @@ simdjson_inline simdjson_result simdjson_result value_iterator::find_ } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -55938,7 +87613,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_ #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -56280,7 +87955,7 @@ simdjson_inline simdjson_result value_iterator::get_number() noexcept { } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -56295,8 +87970,8 @@ simdjson_inline simdjson_result value_iterator::is_root_integer(bool check return answer; } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_start_length(); +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -56304,7 +87979,12 @@ simdjson_inline simdjson_result value_iterator::get_root_n uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); @@ -56312,15 +87992,21 @@ simdjson_inline simdjson_result value_iterator::get_root_n return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } number num; @@ -56352,7 +88038,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iter return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -56368,7 +88054,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -56384,7 +88070,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -56401,7 +88087,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -56418,7 +88104,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -56438,7 +88124,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -56456,27 +88142,33 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } return result; } @@ -56542,6 +88234,9 @@ simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); @@ -56751,150 +88446,716 @@ simdjson_inline error_code value_iterator::report_error(error_code error, const } } // namespace ondemand -} // namespace haswell +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ + + +/* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_ONDEMAND_H +/* end file simdjson/ppc64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere/ondemand.h: #include "simdjson/westmere/ondemand.h" */ +/* begin file simdjson/westmere/ondemand.h */ +#ifndef SIMDJSON_WESTMERE_ONDEMAND_H +#define SIMDJSON_WESTMERE_ONDEMAND_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere } // namespace simdjson -namespace simdjson { +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -} // namespace simdjson + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ -/* end file simdjson/generic/ondemand/amalgamated.h for haswell */ -/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ -/* begin file simdjson/haswell/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } -#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL -SIMDJSON_UNTARGET_REGION -#endif + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } -/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/haswell/end.h */ + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } -#endif // SIMDJSON_HASWELL_ONDEMAND_H -/* end file simdjson/haswell/ondemand.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) -/* including simdjson/icelake/ondemand.h: #include "simdjson/icelake/ondemand.h" */ -/* begin file simdjson/icelake/ondemand.h */ -#ifndef SIMDJSON_ICELAKE_ONDEMAND_H -#define SIMDJSON_ICELAKE_ONDEMAND_H + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; -/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ -/* begin file simdjson/icelake/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ -#define SIMDJSON_IMPLEMENTATION icelake -/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ -/* begin file simdjson/icelake/base.h */ -#ifndef SIMDJSON_ICELAKE_BASE_H -#define SIMDJSON_ICELAKE_BASE_H + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE -namespace simdjson { -/** - * Implementation for Icelake (Intel AVX512). - */ -namespace icelake { + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} -class implementation; + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } -} // namespace icelake -} // namespace simdjson + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } -#endif // SIMDJSON_ICELAKE_BASE_H -/* end file simdjson/icelake/base.h */ -/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ -/* begin file simdjson/icelake/intrinsics.h */ -#ifndef SIMDJSON_ICELAKE_INTRINSICS_H -#define SIMDJSON_ICELAKE_INTRINSICS_H + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// Important: we need the AVX-512 headers: -#include -#include -#include -#include -#include -#include -#include -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } -static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 -#endif // SIMDJSON_ICELAKE_INTRINSICS_H -/* end file simdjson/icelake/intrinsics.h */ +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") -#endif +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ -/* begin file simdjson/icelake/bitmanipulation.h */ -#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H -#define SIMDJSON_ICELAKE_BITMANIPULATION_H +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { // We sometimes call trailing_zero on inputs that are zero, @@ -56907,30 +89168,39 @@ SIMDJSON_NO_SANITIZE_UNDEFINED SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { #if SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; #else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// return __builtin_ctzll(input_num); #endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); + return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO } #if SIMDJSON_REGULAR_VISUAL_STUDIO simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows + // note: we do not support legacy 32-bit Windows in this kernel return __popcnt64(input_num);// Visual Studio wants two underscores } #else @@ -56951,155 +89221,90 @@ simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, } } // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H -/* end file simdjson/icelake/bitmanipulation.h */ -/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ -/* begin file simdjson/icelake/bitmask.h */ -#ifndef SIMDJSON_ICELAKE_BITMASK_H -#define SIMDJSON_ICELAKE_BITMASK_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace icelake { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_ICELAKE_BITMASK_H -/* end file simdjson/icelake/bitmask.h */ -/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ -/* begin file simdjson/icelake/simd.h */ -#ifndef SIMDJSON_ICELAKE_SIMD_H -#define SIMDJSON_ICELAKE_SIMD_H +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ == 8 -#define SIMDJSON_GCC8 1 -#endif // __GNUC__ == 8 -#endif // defined(__GNUC__) && !defined(__clang__) - -#if SIMDJSON_GCC8 -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); -} -#endif // SIMDJSON_GCC8 - - - namespace simdjson { -namespace icelake { +namespace westmere { namespace { namespace simd { - // Forward-declared so they can be used by splat and friends. template struct base { - __m512i value; + __m128i value; // Zero constructor - simdjson_inline base() : value{__m512i()} {} + simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register - simdjson_inline base(const __m512i _value) : value(_value) {} + simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register - simdjson_inline operator const __m512i&() const { return this->value; } - simdjson_inline operator __m512i&() { return this->value; } + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; - // Forward-declared so they can be used by splat and friends. - template - struct simd8; - template> struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m512i _value) : base>(_value) {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { - return _mm512_cmpeq_epi8_mask(lhs, rhs); - } + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } - static const int SIZE = sizeof(base::value); + static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { - // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) - constexpr int shift = 16 - N; - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m512i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_inline simd8 load(const T values[64]) { - return _mm512_loadu_si512(reinterpret_cast(values)); + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( @@ -57107,48 +89312,68 @@ namespace simd { T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array - simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm512_shuffle_epi8(lookup_table, *this); + return _mm_shuffle_epi8(lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. // Design consideration: it seems like a function with the // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); } template @@ -57170,213 +89395,181 @@ namespace simd { template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, - int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, - int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, - int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, - int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } - - simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, - uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, - uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, - uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, - uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - - simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { - return !_mm512_test_epi8_mask(*this, *this); - } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} - - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(mask, output); - return 64 - count_ones(mask); - } + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); } simdjson_inline simd8 reduce_or() const { - return this->chunks[0]; + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask - ); + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); - return this->chunks[0] == mask; + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { - return this->chunks[0] == other.chunks[0]; + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); } simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); - return this->chunks[0] <= mask; + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); } }; // struct simd8x64 } // namespace simd - } // unnamed namespace -} // namespace icelake +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_ICELAKE_SIMD_H -/* end file simdjson/icelake/simd.h */ -/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ -/* begin file simdjson/icelake/stringparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ namespace simdjson { -namespace icelake { +namespace westmere { namespace { using namespace simd; @@ -57384,107 +89577,49 @@ using namespace simd; // Holds backslashes and quotes locations. struct backslash_and_quote { public: - static constexpr uint32_t BYTES_PROCESSED = 64; + static constexpr uint32_t BYTES_PROCESSED = 32; simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } - uint64_t bs_bits; - uint64_t quote_bits; + uint32_t bs_bits; + uint32_t quote_bits; }; // struct backslash_and_quote simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require + // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { - static_cast(v == '\\'), // bs_bits - static_cast(v == '"'), // quote_bits + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H -/* end file simdjson/icelake/stringparsing_defs.h */ -/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ -/* begin file simdjson/icelake/numberparsing_defs.h */ -#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace icelake { -namespace numberparsing { - -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} - -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace numberparsing -} // namespace icelake +} // namespace westmere } // namespace simdjson -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H -/* end file simdjson/icelake/numberparsing_defs.h */ -/* end file simdjson/icelake/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for icelake: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for icelake */ +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for westmere: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for westmere */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for icelake: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for icelake */ +/* including simdjson/generic/ondemand/base.h for westmere: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -57493,7 +89628,7 @@ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * @@ -57504,8 +89639,8 @@ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; -/** @copydoc simdjson::icelake::number_type */ -using number_type = simdjson::icelake::number_type; +/** @copydoc simdjson::westmere::number_type */ +using number_type = simdjson::westmere::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; @@ -57528,13 +89663,139 @@ class value; class value_iterator; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for icelake */ -/* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for icelake */ +/* end file simdjson/generic/ondemand/base.h for westmere */ +/* including simdjson/generic/ondemand/deserialize.h for westmere: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for westmere */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = westmere::ondemand::value; + using document_type = westmere::ondemand::document; + using document_reference_type = westmere::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -57544,7 +89805,7 @@ class value_iterator; /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -57662,7 +89923,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -57672,7 +89934,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -57794,7 +90057,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -57804,7 +90068,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -57912,6 +90177,7 @@ class value_iterator { simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -58006,18 +90272,19 @@ class value_iterator { friend class object; friend class array; friend class value; + friend class field; }; // value_iterator } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -58025,9 +90292,9 @@ struct simdjson_result : public icelake::impl } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H -/* end file simdjson/generic/ondemand/value_iterator.h for icelake */ -/* including simdjson/generic/ondemand/value.h for icelake: #include "simdjson/generic/ondemand/value.h" */ -/* begin file simdjson/generic/ondemand/value.h for icelake */ +/* end file simdjson/generic/ondemand/value_iterator.h for westmere */ +/* including simdjson/generic/ondemand/value.h for westmere: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -58035,12 +90302,15 @@ struct simdjson_result : public icelake::impl /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace icelake { -namespace ondemand { +namespace westmere { +namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. @@ -58065,15 +90335,21 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } + /** * Get this value as the given type. * @@ -58083,7 +90359,38 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) noexcept; + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } /** * Cast this JSON value to an array. @@ -58159,6 +90466,17 @@ class value { * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. @@ -58224,6 +90542,17 @@ class value { simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); /** * Cast this JSON value to an array. * @@ -58387,7 +90716,7 @@ class value { * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. @@ -58399,6 +90728,7 @@ class value { simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -58423,6 +90753,13 @@ class value { * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; /** * Checks whether the value is a negative number. @@ -58455,10 +90792,12 @@ class value { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not @@ -58485,6 +90824,8 @@ class value { * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you @@ -58500,7 +90841,6 @@ class value { */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - /** * Get the raw JSON for this token. * @@ -58598,6 +90938,20 @@ class value { */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + protected: /** * Create a value. @@ -58637,23 +90991,24 @@ class value { friend class object; friend struct simdjson_result; friend struct simdjson_result; + friend class field; }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result get_array() noexcept; - simdjson_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; @@ -58665,7 +91020,7 @@ struct simdjson_result : public icelake::implementatio template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; @@ -58674,20 +91029,22 @@ struct simdjson_result : public icelake::implementatio template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator icelake::ondemand::array() noexcept(false); - simdjson_inline operator icelake::ondemand::object() noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator westmere::ondemand::array() noexcept(false); + simdjson_inline operator westmere::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -58709,9 +91066,9 @@ struct simdjson_result : public icelake::implementatio * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -58727,18 +91084,19 @@ struct simdjson_result : public icelake::implementatio * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. @@ -58747,12 +91105,13 @@ struct simdjson_result : public icelake::implementatio * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; @@ -58762,15 +91121,16 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H -/* end file simdjson/generic/ondemand/value.h for icelake */ -/* including simdjson/generic/ondemand/logger.h for icelake: #include "simdjson/generic/ondemand/logger.h" */ -/* begin file simdjson/generic/ondemand/logger.h for icelake */ +/* end file simdjson/generic/ondemand/value.h for westmere */ +/* including simdjson/generic/ondemand/logger.h for westmere: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -58779,7 +91139,7 @@ struct simdjson_result : public icelake::implementatio /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical @@ -58825,13 +91185,13 @@ static inline void log_error(const value_iterator &iter, const char *error, cons } // namespace logger } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H -/* end file simdjson/generic/ondemand/logger.h for icelake */ -/* including simdjson/generic/ondemand/token_iterator.h for icelake: #include "simdjson/generic/ondemand/token_iterator.h" */ -/* begin file simdjson/generic/ondemand/token_iterator.h for icelake */ +/* end file simdjson/generic/ondemand/logger.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator.h for westmere: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -58842,7 +91202,7 @@ static inline void log_error(const value_iterator &iter, const char *error, cons /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -58881,7 +91241,7 @@ class token_iterator { * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -58911,7 +91271,14 @@ class token_iterator { * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; - + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Return the current index. */ @@ -58966,15 +91333,15 @@ class token_iterator { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -58983,9 +91350,9 @@ struct simdjson_result : public icelake::impl } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H -/* end file simdjson/generic/ondemand/token_iterator.h for icelake */ -/* including simdjson/generic/ondemand/json_iterator.h for icelake: #include "simdjson/generic/ondemand/json_iterator.h" */ -/* begin file simdjson/generic/ondemand/json_iterator.h for icelake */ +/* end file simdjson/generic/ondemand/token_iterator.h for westmere */ +/* including simdjson/generic/ondemand/json_iterator.h for westmere: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -58996,7 +91363,7 @@ struct simdjson_result : public icelake::impl /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -59134,7 +91501,7 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -59162,7 +91529,7 @@ class json_iterator { * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -59173,13 +91540,21 @@ class json_iterator { * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; @@ -59273,6 +91648,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -59288,6 +91666,7 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; + friend class field; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template @@ -59295,15 +91674,15 @@ class json_iterator { }; // json_iterator } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -59312,9 +91691,9 @@ struct simdjson_result : public icelake::imple } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H -/* end file simdjson/generic/ondemand/json_iterator.h for icelake */ -/* including simdjson/generic/ondemand/json_type.h for icelake: #include "simdjson/generic/ondemand/json_type.h" */ -/* begin file simdjson/generic/ondemand/json_type.h for icelake */ +/* end file simdjson/generic/ondemand/json_iterator.h for westmere */ +/* including simdjson/generic/ondemand/json_type.h for westmere: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -59325,7 +91704,7 @@ struct simdjson_result : public icelake::imple /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -59458,15 +91837,15 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t #endif } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -59475,9 +91854,9 @@ struct simdjson_result : public icelake::implement } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H -/* end file simdjson/generic/ondemand/json_type.h for icelake */ -/* including simdjson/generic/ondemand/raw_json_string.h for icelake: #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string.h for icelake */ +/* end file simdjson/generic/ondemand/json_type.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string.h for westmere: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -59487,7 +91866,7 @@ struct simdjson_result : public icelake::implement /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -59663,30 +92042,30 @@ simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_js } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H -/* end file simdjson/generic/ondemand/raw_json_string.h for icelake */ -/* including simdjson/generic/ondemand/parser.h for icelake: #include "simdjson/generic/ondemand/parser.h" */ -/* begin file simdjson/generic/ondemand/parser.h for icelake */ +/* end file simdjson/generic/ondemand/raw_json_string.h for westmere */ +/* including simdjson/generic/ondemand/parser.h for westmere: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -59698,12 +92077,12 @@ struct simdjson_result : public icelake::imp #include namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -59773,6 +92152,22 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). @@ -59787,6 +92182,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -59931,9 +92329,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -59941,7 +92339,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -59964,8 +92362,12 @@ class parser { * behavior of the parser for future operations. */ bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif - /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. @@ -60013,9 +92415,20 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -60029,15 +92442,15 @@ class parser { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -60045,11 +92458,11 @@ struct simdjson_result : public icelake::implementati } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H -/* end file simdjson/generic/ondemand/parser.h for icelake */ +/* end file simdjson/generic/ondemand/parser.h for westmere */ // All other declarations -/* including simdjson/generic/ondemand/array.h for icelake: #include "simdjson/generic/ondemand/array.h" */ -/* begin file simdjson/generic/ondemand/array.h for icelake */ +/* including simdjson/generic/ondemand/array.h for westmere: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -60060,7 +92473,7 @@ struct simdjson_result : public icelake::implementati /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -60096,7 +92509,8 @@ class array { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. * * To check that an array is empty, it is more performant to use * the is_empty() method. @@ -60152,6 +92566,22 @@ class array { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -60223,25 +92653,26 @@ class array { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; @@ -60249,9 +92680,9 @@ struct simdjson_result : public icelake::implementatio } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H -/* end file simdjson/generic/ondemand/array.h for icelake */ -/* including simdjson/generic/ondemand/array_iterator.h for icelake: #include "simdjson/generic/ondemand/array_iterator.h" */ -/* begin file simdjson/generic/ondemand/array_iterator.h for icelake */ +/* end file simdjson/generic/ondemand/array.h for westmere */ +/* including simdjson/generic/ondemand/array_iterator.h for westmere: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -60263,7 +92694,7 @@ struct simdjson_result : public icelake::implementatio namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -60323,15 +92754,15 @@ class array_iterator { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -60339,28 +92770,31 @@ struct simdjson_result : public icelake::impl // Iterator interface // - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H -/* end file simdjson/generic/ondemand/array_iterator.h for icelake */ -/* including simdjson/generic/ondemand/document.h for icelake: #include "simdjson/generic/ondemand/document.h" */ -/* begin file simdjson/generic/ondemand/document.h for icelake */ +/* end file simdjson/generic/ondemand/array_iterator.h for westmere */ +/* including simdjson/generic/ondemand/document.h for westmere: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -60531,22 +92965,39 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); } /** @@ -60560,11 +93011,57 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) & noexcept; + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -60648,7 +93145,8 @@ class document { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. */ simdjson_inline simdjson_result count_elements() & noexcept; /** @@ -60737,7 +93235,7 @@ class document { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array @@ -60760,6 +93258,7 @@ class document { simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -60783,6 +93282,14 @@ class document { */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + /** * Checks whether the document is a negative number. * @@ -60810,9 +93317,11 @@ class document { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected @@ -60930,6 +93439,14 @@ class document { * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values @@ -60946,6 +93463,30 @@ class document { * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing @@ -60986,6 +93527,11 @@ class document { /** * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. */ class document_reference { public: @@ -61011,10 +93557,82 @@ class document_reference { simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; - #if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); @@ -61034,11 +93652,13 @@ class document_reference { simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; @@ -61048,25 +93668,27 @@ class document_reference { simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + private: document *doc{nullptr}; }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -61077,52 +93699,56 @@ struct simdjson_result : public icelake::implementa template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; - #if SIMDJSON_EXCEPTIONS - simdjson_inline operator icelake::ondemand::array() & noexcept(false); - simdjson_inline operator icelake::ondemand::object() & noexcept(false); + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator westmere::ondemand::array() & noexcept(false); + simdjson_inline operator westmere::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator icelake::ondemand::value() noexcept(false); + simdjson_inline operator westmere::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; @@ -61133,14 +93759,14 @@ struct simdjson_result : public icelake::implementa namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result(westmere::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -61151,54 +93777,64 @@ struct simdjson_result : public icelake:: template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator icelake::ondemand::array() & noexcept(false); - simdjson_inline operator icelake::ondemand::object() & noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator westmere::ondemand::array() & noexcept(false); + simdjson_inline operator westmere::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator icelake::ondemand::value() noexcept(false); + simdjson_inline operator westmere::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H -/* end file simdjson/generic/ondemand/document.h for icelake */ -/* including simdjson/generic/ondemand/document_stream.h for icelake: #include "simdjson/generic/ondemand/document_stream.h" */ -/* begin file simdjson/generic/ondemand/document_stream.h for icelake */ +/* end file simdjson/generic/ondemand/document.h for westmere */ +/* including simdjson/generic/ondemand/document_stream.h for westmere: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -61216,7 +93852,7 @@ struct simdjson_result : public icelake:: #endif namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -61323,10 +93959,9 @@ class document_stream { class iterator { public: using value_type = simdjson_result; - using reference = value_type; - + using reference = simdjson_result; + using pointer = void; using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; /** @@ -61336,7 +93971,7 @@ class document_stream { /** * Get the current document (or error). */ - simdjson_inline simdjson_result operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -61518,18 +94153,18 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -61537,9 +94172,9 @@ struct simdjson_result : public icelake::imp } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H -/* end file simdjson/generic/ondemand/document_stream.h for icelake */ -/* including simdjson/generic/ondemand/field.h for icelake: #include "simdjson/generic/ondemand/field.h" */ -/* begin file simdjson/generic/ondemand/field.h for icelake */ +/* end file simdjson/generic/ondemand/document_stream.h for westmere */ +/* including simdjson/generic/ondemand/field.h for westmere: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -61551,7 +94186,7 @@ struct simdjson_result : public icelake::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -61578,12 +94213,39 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; /** * Get the field value. */ @@ -61602,29 +94264,33 @@ class field : public std::pair { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; - simdjson_inline simdjson_result key() noexcept; - simdjson_inline simdjson_result value() noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H -/* end file simdjson/generic/ondemand/field.h for icelake */ -/* including simdjson/generic/ondemand/object.h for icelake: #include "simdjson/generic/ondemand/object.h" */ -/* begin file simdjson/generic/ondemand/object.h for icelake */ +/* end file simdjson/generic/ondemand/field.h for westmere */ +/* including simdjson/generic/ondemand/object.h for westmere: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -61635,7 +94301,7 @@ struct simdjson_result : public icelake::implementatio /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { /** @@ -61683,6 +94349,8 @@ class object { * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -61704,7 +94372,7 @@ class object { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. @@ -61720,6 +94388,8 @@ class object { * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -61767,11 +94437,24 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -61833,27 +94516,29 @@ class object { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -61864,9 +94549,9 @@ struct simdjson_result : public icelake::implementati } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H -/* end file simdjson/generic/ondemand/object.h for icelake */ -/* including simdjson/generic/ondemand/object_iterator.h for icelake: #include "simdjson/generic/ondemand/object_iterator.h" */ -/* begin file simdjson/generic/ondemand/object_iterator.h for icelake */ +/* end file simdjson/generic/ondemand/object.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator.h for westmere: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -61877,7 +94562,7 @@ struct simdjson_result : public icelake::implementati /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { class object_iterator { @@ -61918,15 +94603,15 @@ class object_iterator { }; } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public icelake::implementation_simdjson_result_base { +struct simdjson_result : public westmere::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(icelake::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(westmere::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -61935,21 +94620,21 @@ struct simdjson_result : public icelake::imp // // Reads key and value, yielding them to the user. - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H -/* end file simdjson/generic/ondemand/object_iterator.h for icelake */ -/* including simdjson/generic/ondemand/serialization.h for icelake: #include "simdjson/generic/ondemand/serialization.h" */ -/* begin file simdjson/generic/ondemand/serialization.h for icelake */ +/* end file simdjson/generic/ondemand/object_iterator.h for westmere */ +/* including simdjson/generic/ondemand/serialization.h for westmere: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -61963,30 +94648,30 @@ namespace simdjson { * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept; +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ -inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept; +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept; +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** @@ -61996,7 +94681,7 @@ inline simdjson_result to_json_string(simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -62018,9 +94703,9 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -62030,13 +94715,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value); +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not @@ -62046,22 +94731,237 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif -}}} // namespace simdjson::icelake::ondemand +}}} // namespace simdjson::westmere::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H -/* end file simdjson/generic/ondemand/serialization.h for icelake */ +/* end file simdjson/generic/ondemand/serialization.h for westmere */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for westmere: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for westmere */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + westmere::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + westmere::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for westmere */ // Inline definitions -/* including simdjson/generic/ondemand/array-inl.h for icelake: #include "simdjson/generic/ondemand/array-inl.h" */ -/* begin file simdjson/generic/ondemand/array-inl.h for icelake */ +/* including simdjson/generic/ondemand/array-inl.h for westmere: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ @@ -62071,7 +94971,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for westmere */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::array_iterator &&value +) noexcept + : westmere::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : westmere::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/value-inl.h for westmere: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { - bool has_value; - SIMDJSON_TRY(iter.started_array().get(has_value)); - return array(iter); + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result array::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline simdjson_result array::end() noexcept { - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline error_code array::consume() noexcept { - auto error = iter.json_iter().skip_child(iter.depth()-1); - if(error) { iter.abandon(); } - return error; + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); } -simdjson_inline simdjson_result array::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline simdjson_result array::count_elements() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the array after counting the number of elements. - iter.reset_array(); - return count; +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_inline simdjson_result array::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_array().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); } -inline simdjson_result array::reset() & noexcept { - return iter.reset_array(); +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); } -inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); } +} - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - // Get the child - auto child = at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; } + return true; +} - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; } - return child; } -simdjson_inline simdjson_result array::at(size_t index) noexcept { - size_t i = 0; - for (auto value : *this) { - if (i == index) { return value; } - i++; +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; } - return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error -) noexcept - : implementation_simdjson_result_base(error) +) noexcept : + implementation_simdjson_result_base(error) { } - -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } - return first.end(); + return {}; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } - return first.is_empty(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } - return first.at(index); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } - return first.raw_json(); + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H -/* end file simdjson/generic/ondemand/array-inl.h for icelake */ -/* including simdjson/generic/ondemand/array_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} -namespace simdjson { -namespace icelake { -namespace ondemand { +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} -simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} -simdjson_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); } -simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); } -simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); } -simdjson_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if (( error = iter.error() )) { return *this; } - if (( error = iter.skip_child() )) { return *this; } - if (( error = iter.has_next_element().error() )) { return *this; } - return *this; +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif -} // namespace ondemand -} // namespace icelake -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::array_iterator &&value -) noexcept - : icelake::implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : icelake::implementation_simdjson_result_base({}, error) -{ + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } - return *first; + return first.current_location(); } -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/document-inl.h for icelake: #include "simdjson/generic/ondemand/document-inl.h" */ -/* begin file simdjson/generic/ondemand/document-inl.h for icelake */ +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for westmere */ +/* including simdjson/generic/ondemand/document-inl.h for westmere: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept @@ -62535,22 +96001,29 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -62607,7 +96080,14 @@ simdjson_inline simdjson_result document::operator[](const char *key) & n } simdjson_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } @@ -62629,12 +96109,21 @@ simdjson_inline simdjson_result document::type() noexcept { } simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } @@ -62654,7 +96143,7 @@ simdjson_inline simdjson_result document::get_number() noexcept { simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { @@ -62675,275 +96164,305 @@ simdjson_inline simdjson_result document::at_pointer(std::string_view jso } } +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::document &&value +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base( + implementation_simdjson_result_base( error ) { } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first).get(); + return std::forward(first).get(); } template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return std::forward(first).get(out); } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first); + return std::forward(first); } -template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { if (error()) { return error(); } - out = std::forward(first); + out = std::forward(first); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} -simdjson_inline bool simdjson_result::is_negative() noexcept { +simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline bool simdjson_result::at_end() const noexcept { +simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} } // namespace simdjson namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} @@ -62979,15 +96498,25 @@ simdjson_inline simdjson_result document_reference::get_raw_jso simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } - +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif @@ -63004,6 +96533,7 @@ simdjson_inline simdjson_result document_reference::find_field_unordered( simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } @@ -63012,214 +96542,264 @@ simdjson_inline simdjson_result document_reference::get_number_type simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(westmere::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(westmere::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for icelake */ -/* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +/* end file simdjson/generic/ondemand/document-inl.h for westmere */ +/* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -63234,7 +96814,7 @@ simdjson_inline simdjson_result simdjson_result namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -63374,7 +96954,6 @@ simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bo } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } @@ -63564,9 +97143,18 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc depth--; break; default: // Scalar value document - // TODO: Remove any trailing whitespaces + // TODO: We could remove trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } } cur_struct_index++; } @@ -63621,22 +97209,22 @@ inline void document_stream::start_stage1_thread() noexcept { #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::document_stream &&value +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document_stream &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } @@ -63644,9 +97232,9 @@ simdjson_inline simdjson_result::simdjson_re } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ -/* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for icelake */ +/* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +/* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -63658,10 +97246,10 @@ simdjson_inline simdjson_result::simdjson_re /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +// clang 6 does not think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept @@ -63687,11 +97275,32 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + simdjson_inline value &field::value() & noexcept { return second; } @@ -63701,35 +97310,53 @@ simdjson_inline value field::value() && noexcept { } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::field &&value +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::field &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { + +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } @@ -63737,9 +97364,9 @@ simdjson_inline simdjson_result simdjson_result simdjson_resultimplementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -64028,6 +97672,12 @@ simdjson_inline uint32_t json_iterator::peek_length(token_position position) con #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... @@ -64073,11 +97723,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -64136,22 +97798,22 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ +/* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -64162,7 +97824,7 @@ simdjson_inline simdjson_result::simdjson_resu /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { @@ -64202,7 +97864,6 @@ simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } - simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } @@ -64257,22 +97918,22 @@ simdjson_inline void number::skip_double() noexcept { } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ -/* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ +/* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ +/* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -64287,7 +97948,7 @@ simdjson_inline simdjson_result::simdjson_result(e #include namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { namespace logger { @@ -64494,13 +98155,13 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de } // namespace logger } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for icelake */ -/* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for icelake */ +/* end file simdjson/generic/ondemand/logger-inl.h for westmere */ +/* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -64515,7 +98176,7 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { @@ -64667,6 +98328,14 @@ inline simdjson_result object::at_pointer(std::string_view json_pointer) return child; } +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. @@ -64691,79 +98360,87 @@ simdjson_inline simdjson_result object::reset() & noexcept { } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); + return std::forward(first).find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first)[key]; + return std::forward(first)[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field(key); + return std::forward(first).find_field(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } -inline simdjson_result simdjson_result::reset() noexcept { +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } -inline simdjson_result simdjson_result::is_empty() noexcept { +inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for icelake */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +/* end file simdjson/generic/ondemand/object-inl.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -64775,7 +98452,7 @@ simdjson_inline simdjson_result simdjson_result::simdjson_result( - icelake::ondemand::object_iterator &&value +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::object_iterator &&value ) noexcept - : implementation_simdjson_result_base(std::forward(value)) + : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) { } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; @@ -64902,9 +98579,9 @@ simdjson_inline simdjson_result &simdjson_re } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ +/* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -64921,7 +98598,7 @@ simdjson_inline simdjson_result &simdjson_re /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept @@ -64949,6 +98626,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -64965,6 +98647,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -65036,13 +98739,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -65071,22 +98774,22 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for icelake */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +/* end file simdjson/generic/ondemand/parser-inl.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -65099,7 +98802,7 @@ simdjson_inline simdjson_result::simdjson_result(erro namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} @@ -65109,36 +98812,39 @@ simdjson_inline const char * raw_json_string::raw() const noexcept { return rein simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -65150,7 +98856,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -65265,34 +98971,34 @@ simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, cons } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ -/* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +/* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -65319,43 +99025,43 @@ inline std::string_view trim(const std::string_view str) noexcept { } -inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { +inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ - using namespace icelake::ondemand; - icelake::ondemand::json_type t; + using namespace westmere::ondemand; + westmere::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { - icelake::ondemand::array array; + westmere::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { - icelake::ondemand::object object; + westmere::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); @@ -65365,50 +99071,50 @@ inline simdjson_result to_json_string(icelake::ondemand::value } } -inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson -namespace simdjson { namespace icelake { namespace ondemand { +namespace simdjson { namespace westmere { namespace ondemand { #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -65417,12 +99123,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -65434,7 +99140,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65443,12 +99149,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65460,7 +99166,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65469,7 +99175,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65478,16 +99184,16 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65499,7 +99205,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65508,12 +99214,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -65523,12 +99229,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: } } #endif -}}} // namespace simdjson::icelake::ondemand +}}} // namespace simdjson::westmere::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +/* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -65539,7 +99245,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand:: /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { +namespace westmere { namespace ondemand { simdjson_inline token_iterator::token_iterator( @@ -65568,6 +99274,11 @@ simdjson_inline uint32_t token_iterator::peek_length(token_position position) co return *(position+1) - *position; } +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); +} simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } @@ -65605,498 +99316,22 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/value-inl.h for icelake: #include "simdjson/generic/ondemand/value-inl.h" */ -/* begin file simdjson/generic/ondemand/value-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace icelake { -namespace ondemand { - -simdjson_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_inline value value::resume(const value_iterator &iter) noexcept { - return iter; -} - -simdjson_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); - } else { - return object::resume(iter); - } -} - -simdjson_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { - return iter.get_string(allow_replacement); -} -template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { - return iter.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result value::get_wobbly_string() noexcept { - return iter.get_wobbly_string(); -} -simdjson_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); -} -simdjson_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); -} -simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); -} -simdjson_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); -} -simdjson_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); -} -simdjson_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); -} -simdjson_inline simdjson_result value::is_null() noexcept { - return iter.is_null(); -} -template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } - -template simdjson_inline error_code value::get(T &out) noexcept { - return get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_inline value::operator object() noexcept(false) { - return get_object(); -} -simdjson_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); -} -simdjson_inline value::operator int64_t() noexcept(false) { - return get_int64(); -} -simdjson_inline value::operator double() noexcept(false) { - return get_double(); -} -simdjson_inline value::operator std::string_view() noexcept(false) { - return get_string(false); -} -simdjson_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); -} -simdjson_inline value::operator bool() noexcept(false) { - return get_bool(); -} -#endif - -simdjson_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); -} - -simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); -} - -simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} - -simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; -} - -simdjson_inline simdjson_result value::type() noexcept { - return iter.type(); -} - -simdjson_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_inline bool value::is_negative() noexcept { - return iter.is_negative(); -} - -simdjson_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); -} - -simdjson_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); -} - -simdjson_inline simdjson_result value::raw_json() noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: { - ondemand::array array; - SIMDJSON_TRY(get_array().get(array)); - return array.raw_json(); - } - case json_type::object: { - ondemand::object object; - SIMDJSON_TRY(get_object().get(object)); - return object.raw_json(); - } - default: - return raw_json_token(); - } -} - -simdjson_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); -} - -simdjson_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); -} - -simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace icelake -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; -} - -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} - -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} - -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; -} - -simdjson_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); -} -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); -} -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); -} - -template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); -} -template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; -} - -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator icelake::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator icelake::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} - -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} - -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H -/* end file simdjson/generic/ondemand/value-inl.h for icelake */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +/* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -66105,13 +99340,13 @@ simdjson_inline simdjson_result simdjson_result value_iterator::find_ } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -66348,7 +99583,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_ #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -66690,7 +99925,7 @@ simdjson_inline simdjson_result value_iterator::get_number() noexcept { } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -66705,8 +99940,8 @@ simdjson_inline simdjson_result value_iterator::is_root_integer(bool check return answer; } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_start_length(); +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -66714,7 +99949,12 @@ simdjson_inline simdjson_result value_iterator::get_root_n uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); @@ -66722,15 +99962,21 @@ simdjson_inline simdjson_result value_iterator::get_root_n return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } number num; @@ -66762,7 +100008,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iter return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -66778,7 +100024,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -66794,7 +100040,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -66811,7 +100057,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -66828,7 +100074,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -66848,7 +100094,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -66866,27 +100112,33 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } return result; } @@ -66952,6 +100204,9 @@ simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); @@ -67161,51 +100416,53 @@ simdjson_inline error_code value_iterator::report_error(error_code error, const } } // namespace ondemand -} // namespace icelake +} // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ -/* end file simdjson/generic/ondemand/amalgamated.h for icelake */ -/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ -/* begin file simdjson/icelake/end.h */ +/* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ + + +/* end file simdjson/generic/ondemand/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE SIMDJSON_UNTARGET_REGION #endif -/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ #undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/icelake/end.h */ - -#endif // SIMDJSON_ICELAKE_ONDEMAND_H -/* end file simdjson/icelake/ondemand.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) -/* including simdjson/ppc64/ondemand.h: #include "simdjson/ppc64/ondemand.h" */ -/* begin file simdjson/ppc64/ondemand.h */ -#ifndef SIMDJSON_PPC64_ONDEMAND_H -#define SIMDJSON_PPC64_ONDEMAND_H +/* end file simdjson/westmere/end.h */ -/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ -/* begin file simdjson/ppc64/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ -#define SIMDJSON_IMPLEMENTATION ppc64 -/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ -/* begin file simdjson/ppc64/base.h */ -#ifndef SIMDJSON_PPC64_BASE_H -#define SIMDJSON_PPC64_BASE_H +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lsx) +/* including simdjson/lsx/ondemand.h: #include "simdjson/lsx/ondemand.h" */ +/* begin file simdjson/lsx/ondemand.h */ +#ifndef SIMDJSON_LSX_ONDEMAND_H +#define SIMDJSON_LSX_ONDEMAND_H + +/* including simdjson/lsx/begin.h: #include "simdjson/lsx/begin.h" */ +/* begin file simdjson/lsx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lsx" */ +#define SIMDJSON_IMPLEMENTATION lsx +/* including simdjson/lsx/base.h: #include "simdjson/lsx/base.h" */ +/* begin file simdjson/lsx/base.h */ +#ifndef SIMDJSON_LSX_BASE_H +#define SIMDJSON_LSX_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #include "simdjson/base.h" */ @@ -67213,9 +100470,9 @@ SIMDJSON_UNTARGET_REGION namespace simdjson { /** - * Implementation for ALTIVEC (PPC64). + * Implementation for LSX. */ -namespace ppc64 { +namespace lsx { class implementation; @@ -67226,48 +100483,41 @@ template struct simd8x64; } // namespace simd } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_BASE_H -/* end file simdjson/ppc64/base.h */ -/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ -/* begin file simdjson/ppc64/intrinsics.h */ -#ifndef SIMDJSON_PPC64_INTRINSICS_H -#define SIMDJSON_PPC64_INTRINSICS_H +#endif // SIMDJSON_LSX_BASE_H +/* end file simdjson/lsx/base.h */ +/* including simdjson/lsx/intrinsics.h: #include "simdjson/lsx/intrinsics.h" */ +/* begin file simdjson/lsx/intrinsics.h */ +#ifndef SIMDJSON_LSX_INTRINSICS_H +#define SIMDJSON_LSX_INTRINSICS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ // This should be the correct header whether // you use visual studio or other compilers. -#include - -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool -#undef bool -#endif - -#ifdef vector -#undef vector -#endif +#include -static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch SX"); -#endif // SIMDJSON_PPC64_INTRINSICS_H -/* end file simdjson/ppc64/intrinsics.h */ -/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ -/* begin file simdjson/ppc64/bitmanipulation.h */ -#ifndef SIMDJSON_PPC64_BITMANIPULATION_H -#define SIMDJSON_PPC64_BITMANIPULATION_H +#endif // SIMDJSON_LSX_INTRINSICS_H +/* end file simdjson/lsx/intrinsics.h */ +/* including simdjson/lsx/bitmanipulation.h: #include "simdjson/lsx/bitmanipulation.h" */ +/* begin file simdjson/lsx/bitmanipulation.h */ +#ifndef SIMDJSON_LSX_BITMANIPULATION_H +#define SIMDJSON_LSX_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { // We sometimes call trailing_zero on inputs that are zero, @@ -67279,99 +100529,54 @@ SIMDJSON_NO_SANITIZE_UNDEFINED // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num - 1); + return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else return __builtin_clzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else +/* result might be undefined when input_num is zero */ simdjson_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); + return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__m128i(v2u64{input_num, 0})), 0); } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); -#endif } } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_BITMANIPULATION_H -/* end file simdjson/ppc64/bitmanipulation.h */ -/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ -/* begin file simdjson/ppc64/bitmask.h */ -#ifndef SIMDJSON_PPC64_BITMASK_H -#define SIMDJSON_PPC64_BITMASK_H +#endif // SIMDJSON_LSX_BITMANIPULATION_H +/* end file simdjson/lsx/bitmanipulation.h */ +/* including simdjson/lsx/bitmask.h: #include "simdjson/lsx/bitmask.h" */ +/* begin file simdjson/lsx/bitmask.h */ +#ifndef SIMDJSON_LSX_BITMASK_H +#define SIMDJSON_LSX_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { // -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is -// encountered. +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. // // For example, prefix_xor(00100100) == 00011100 // simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { - // You can use the version below, however gcc sometimes miscompiles - // vec_pmsum_be, it happens somewhere around between 8 and 9th version. - // The performance boost was not noticeable, falling back to a usual - // implementation. - // __vector unsigned long long all_ones = {~0ull, ~0ull}; - // __vector unsigned long long mask = {bitmask, 0}; - // // Clang and GCC return different values for pmsum for ull so cast it to one. - // // Generally it is not specified by ALTIVEC ISA what is returned by - // // vec_pmsum_be. - // #if defined(__LITTLE_ENDIAN__) - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); - // #else - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); - // #endif bitmask ^= bitmask << 1; bitmask ^= bitmask << 2; bitmask ^= bitmask << 4; @@ -67382,32 +100587,26 @@ simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { } } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson #endif -/* end file simdjson/ppc64/bitmask.h */ -/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ -/* begin file simdjson/ppc64/numberparsing_defs.h */ -#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H -#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/bitmask.h */ +/* including simdjson/lsx/numberparsing_defs.h: #include "simdjson/lsx/numberparsing_defs.h" */ +/* begin file simdjson/lsx/numberparsing_defs.h */ +#ifndef SIMDJSON_LSX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LSX_NUMBERPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/intrinsics.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include -#if defined(__linux__) -#include -#elif defined(__FreeBSD__) -#include -#endif - namespace simdjson { -namespace ppc64 { +namespace lsx { namespace numberparsing { // we don't have appropriate instructions, so let us use a scalar function @@ -67416,533 +100615,403 @@ namespace numberparsing { static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); -#ifdef __BIG_ENDIAN__ -#if defined(__linux__) - val = bswap_64(val); -#elif defined(__FreeBSD__) - val = bswap64(val); -#endif -#endif val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } -/** @private */ simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; answer.low = uint64_t(r); answer.high = uint64_t(r >> 64); -#endif return answer; } } // namespace numberparsing -} // namespace ppc64 +} // namespace lsx } // namespace simdjson +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else #define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif -#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H -/* end file simdjson/ppc64/numberparsing_defs.h */ -/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ -/* begin file simdjson/ppc64/simd.h */ -#ifndef SIMDJSON_PPC64_SIMD_H -#define SIMDJSON_PPC64_SIMD_H +#endif // SIMDJSON_LSX_NUMBERPARSING_DEFS_H +/* end file simdjson/lsx/numberparsing_defs.h */ +/* including simdjson/lsx/simd.h: #include "simdjson/lsx/simd.h" */ +/* begin file simdjson/lsx/simd.h */ +#ifndef SIMDJSON_LSX_SIMD_H +#define SIMDJSON_LSX_SIMD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include - namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { namespace simd { -using __m128i = __vector unsigned char; - -template struct base { - __m128i value; - - // Zero constructor - simdjson_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m128i value; - // Conversion to SIMD register - simdjson_inline operator const __m128i &() const { - return this->value; - } - simdjson_inline operator __m128i &() { return this->value; } + // Zero constructor + simdjson_inline base() : value{__m128i()} {} - // Bit operations - simdjson_inline Child operator|(const Child other) const { - return vec_or(this->value, (__m128i)other); - } - simdjson_inline Child operator&(const Child other) const { - return vec_and(this->value, (__m128i)other); - } - simdjson_inline Child operator^(const Child other) const { - return vec_xor(this->value, (__m128i)other); - } - simdjson_inline Child bit_andnot(const Child other) const { - return vec_andc(this->value, (__m128i)other); - } - simdjson_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdjson_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdjson_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const v16i8&() const { return (v16i8&)this->value; } + simdjson_inline operator v16i8&() { return (v16i8&)this->value; } - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} + // Bit operations + simdjson_inline Child operator|(const Child other) const { return __lsx_vor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lsx_vand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lsx_vxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lsx_vandn_v(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { - return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); - } + // Forward-declared so they can be used by splat and friends. + template + struct simd8; - static const int SIZE = sizeof(base>::value); + template> + struct base8: base> { + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - template - simdjson_inline simd8 prev(simd8 prev_chunk) const { - __m128i chunk = this->value; -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve(this->value); - prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); -#endif - chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve((__m128i)chunk); -#endif - return chunk; - } -}; + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lsx_vseq_b(lhs, rhs); } -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdjson_inline simd8 splat(bool _value) { - return (__m128i)vec_splats((unsigned char)(-(!!_value))); - } + static const int SIZE = sizeof(base>::value); - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) - : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) - : base8(splat(_value)) {} + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return __lsx_vor_v(__lsx_vbsll_v(*this, N), __lsx_vbsrl_v(prev_chunk, 16 - N)); + } + }; - simdjson_inline int to_bitmask() const { - __vector unsigned long long result; - const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, - 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { + return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); + } - result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, - (__m128i)perm_mask)); -#ifdef __LITTLE_ENDIAN__ - return static_cast(result[1]); -#else - return static_cast(result[0]); -#endif - } - simdjson_inline bool any() const { - return !vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_inline simd8 operator~() const { - return this->value ^ (__m128i)splat(true); - } -}; + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} -template struct base8_numeric : base8 { - static simdjson_inline simd8 splat(T value) { - (void)value; - return (__m128i)vec_splats(value); - } - static simdjson_inline simd8 zero() { return splat(0); } - static simdjson_inline simd8 load(const T values[16]) { - return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } + simdjson_inline int to_bitmask() const { return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool any() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) - : base8(_value) {} + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return __lsx_vreplgr2vr_b(_value); } + static simdjson_inline simd8 zero() { return __lsx_vldi(0); } + static simdjson_inline simd8 load(const T values[16]) { + return __lsx_vld(reinterpret_cast(values), 0); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // Store to array - simdjson_inline void store(T dst[16]) const { - vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); - } + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + // Store to array + simdjson_inline void store(T dst[16]) const { + return __lsx_vst(*this, reinterpret_cast<__m128i *>(dst), 0); + } - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { - return (__m128i)((__m128i)this->value + (__m128i)other); - } - simdjson_inline simd8 operator-(const simd8 other) const { - return (__m128i)((__m128i)this->value - (__m128i)other); - } - simdjson_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdjson_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return __lsx_vadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lsx_vsub_b(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); - } + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted - // as a bitset). Passing a 0 value for mask would be equivalent to writing out - // every byte to output. Only the first 16 - count_ones(mask) bytes of the - // result are significant but 16 bytes get written. Design consideration: it - // seems like a function with the signature simd8 compress(uint32_t mask) - // would be sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L *output) const { - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - using internal::thintable_epi8; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. -#ifdef __LITTLE_ENDIAN__ - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask1], thintable_epi8[mask2]}; -#else - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask2], thintable_epi8[mask1]}; - shufmask = (__m128i)vec_reve((__m128i)shufmask); -#endif - // we increment by 0x08 the second half of the mask - shufmask = ((__m128i)shufmask) + - ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return __lsx_vshuf_b(lookup_table, lookup_table, *this); + } - // this is the version "nearly pruned" - __m128i pruned = vec_perm(this->value, this->value, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); - vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); - } + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by haswell + // lsx do it in 2 steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register. + __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808}; + // this is the version "nearly pruned" + __m128i pruned = __lsx_vshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask + __m128i compactmask = __lsx_vldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); + __lsx_vst(answer, reinterpret_cast(output), 0); + } - template - simdjson_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; -// Signed bytes -template <> struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, - v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8({ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // Order-sensitive comparisons - simdjson_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - operator>(const simd8 other) const { - return (__m128i)vec_cmpgt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_inline simd8 - operator<(const simd8 other) const { - return (__m128i)vec_cmplt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } -}; + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lsx_vslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lsx_vslt_b(*this, other); } + }; -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(__m128i(v16u8{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + })) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } - // Saturated math - simdjson_inline simd8 - saturating_add(const simd8 other) const { - return (__m128i)vec_adds(this->value, (__m128i)other); - } - simdjson_inline simd8 - saturating_sub(const simd8 other) const { - return (__m128i)vec_subs(this->value, (__m128i)other); - } + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lsx_vsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lsx_vssub_bu(*this, other); } - // Order-specific operations - simdjson_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max(this->value, (__m128i)other); - } - simdjson_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min(this->value, (__m128i)other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdjson_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdjson_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdjson_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdjson_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return __lsx_vmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lsx_vmin_bu(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { - return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); - } - simdjson_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdjson_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdjson_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdjson_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { - return vec_all_eq(vec_and(this->value, (__m128i)bits), - (__m128i)vec_splats(0)); - } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdjson_inline simd8 shr() const { - return simd8( - (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); - } - template simdjson_inline simd8 shl() const { - return simd8( - (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); - } -}; + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*this), 0); } + simdjson_inline bool bits_not_set_anywhere() const { return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*this), 0); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*this, bits)), 0); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(__lsx_vsrli_b(*this, N)); } + template + simdjson_inline simd8 shl() const { return simd8(__lsx_vslli_b(*this, N)); } + }; -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "PPC64 kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "LSX kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) - : chunks{simd8::load(ptr), simd8::load(ptr + 16), - simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0); - this->chunks[1].store(ptr + sizeof(simd8) * 1); - this->chunks[2].store(ptr + sizeof(simd8) * 2); - this->chunks[3].store(ptr + sizeof(simd8) * 3); - } + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint16_t mask1 = uint16_t(mask); + uint16_t mask2 = uint16_t(mask >> 16); + uint16_t mask3 = uint16_t(mask >> 32); + uint16_t mask4 = uint16_t(mask >> 48); + __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0})); + uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0); + uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1); + uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2); + uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3); + uint8_t *voutput = reinterpret_cast(output); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, reinterpret_cast(voutput)); + voutput += zcnt1; + if (zcnt2) + this->chunks[1].compress(mask2, reinterpret_cast(voutput)); + voutput += zcnt2; + if (zcnt3) + this->chunks[2].compress(mask3, reinterpret_cast(voutput)); + voutput += zcnt3; + if (zcnt4) + this->chunks[3].compress(mask4, reinterpret_cast(voutput)); + voutput += zcnt4; + return reinterpret_cast(voutput) - reinterpret_cast(output); + } - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } - simdjson_inline uint64_t compress(uint64_t mask, T *output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), - output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), - output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), - output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } + simdjson_inline uint64_t to_bitmask() const { + __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]); + __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]); + __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]); + __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]); + mask1 = __lsx_vilvl_h(mask2, mask1); + mask2 = __lsx_vilvl_h(mask4, mask3); + return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0); + } - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } -}; // struct simd8x64 + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 } // namespace simd } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_SIMD_INPUT_H -/* end file simdjson/ppc64/simd.h */ -/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ -/* begin file simdjson/ppc64/stringparsing_defs.h */ -#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H -#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#endif // SIMDJSON_LSX_SIMD_H +/* end file simdjson/lsx/simd.h */ +/* including simdjson/lsx/stringparsing_defs.h: #include "simdjson/lsx/stringparsing_defs.h" */ +/* begin file simdjson/lsx/stringparsing_defs.h */ +#ifndef SIMDJSON_LSX_STRINGPARSING_DEFS_H +#define SIMDJSON_LSX_STRINGPARSING_DEFS_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace { using namespace simd; @@ -67951,65 +101020,53 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote - copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_inline bool has_quote_first() { - return ((bs_bits - 1) & quote_bits) != 0; - } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } - simdjson_inline int quote_index() { - return trailing_zeroes(quote_bits); - } - simdjson_inline int backslash_index() { - return trailing_zeroes(bs_bits); - } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote -simdjson_inline backslash_and_quote -backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), - "backslash and quote finder must process fewer than " - "SIMDJSON_PADDING bytes"); + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); simd8 v0(src); simd8 v1(src + sizeof(v0)); v0.store(dst); v1.store(dst + sizeof(v0)); - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on - // PPC; therefore, we smash them together into a 64-byte mask and get the - // bitmask from there. - uint64_t bs_and_quote = - simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on LSX; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits }; } } // unnamed namespace -} // namespace ppc64 +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H -/* end file simdjson/ppc64/stringparsing_defs.h */ +#endif // SIMDJSON_LSX_STRINGPARSING_DEFS_H +/* end file simdjson/lsx/stringparsing_defs.h */ #define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 -/* end file simdjson/ppc64/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for ppc64: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for ppc64 */ +/* end file simdjson/lsx/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for lsx: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for lsx */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for ppc64: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for ppc64 */ +/* including simdjson/generic/ondemand/base.h for lsx: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -68018,7 +101075,7 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * @@ -68029,8 +101086,8 @@ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; -/** @copydoc simdjson::ppc64::number_type */ -using number_type = simdjson::ppc64::number_type; +/** @copydoc simdjson::lsx::number_type */ +using number_type = simdjson::lsx::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; @@ -68053,13 +101110,139 @@ class value; class value_iterator; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for ppc64 */ -/* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for ppc64 */ +/* end file simdjson/generic/ondemand/base.h for lsx */ +/* including simdjson/generic/ondemand/deserialize.h for lsx: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for lsx */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = lsx::ondemand::value; + using document_type = lsx::ondemand::document; + using document_reference_type = lsx::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for lsx */ +/* including simdjson/generic/ondemand/value_iterator.h for lsx: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -68069,7 +101252,7 @@ class value_iterator; /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -68187,7 +101370,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -68197,7 +101381,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -68319,7 +101504,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -68329,7 +101515,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -68437,6 +101624,7 @@ class value_iterator { simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -68531,18 +101719,19 @@ class value_iterator { friend class object; friend class array; friend class value; + friend class field; }; // value_iterator } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -68550,9 +101739,9 @@ struct simdjson_result : public ppc64::implemen } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H -/* end file simdjson/generic/ondemand/value_iterator.h for ppc64 */ -/* including simdjson/generic/ondemand/value.h for ppc64: #include "simdjson/generic/ondemand/value.h" */ -/* begin file simdjson/generic/ondemand/value.h for ppc64 */ +/* end file simdjson/generic/ondemand/value_iterator.h for lsx */ +/* including simdjson/generic/ondemand/value.h for lsx: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -68560,12 +101749,15 @@ struct simdjson_result : public ppc64::implemen /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace ppc64 { -namespace ondemand { +namespace lsx { +namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. @@ -68590,15 +101782,21 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } + /** * Get this value as the given type. * @@ -68608,7 +101806,38 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) noexcept; + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } /** * Cast this JSON value to an array. @@ -68684,6 +101913,17 @@ class value { * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. @@ -68749,6 +101989,17 @@ class value { simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); /** * Cast this JSON value to an array. * @@ -68912,7 +102163,7 @@ class value { * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. @@ -68924,6 +102175,7 @@ class value { simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -68948,6 +102200,13 @@ class value { * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; /** * Checks whether the value is a negative number. @@ -68980,10 +102239,12 @@ class value { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not @@ -69010,6 +102271,8 @@ class value { * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you @@ -69025,7 +102288,6 @@ class value { */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - /** * Get the raw JSON for this token. * @@ -69123,6 +102385,20 @@ class value { */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + protected: /** * Create a value. @@ -69162,23 +102438,24 @@ class value { friend class object; friend struct simdjson_result; friend struct simdjson_result; + friend class field; }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result get_array() noexcept; - simdjson_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; @@ -69190,7 +102467,7 @@ struct simdjson_result : public ppc64::implementation_si template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; @@ -69199,20 +102476,22 @@ struct simdjson_result : public ppc64::implementation_si template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator ppc64::ondemand::array() noexcept(false); - simdjson_inline operator ppc64::ondemand::object() noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lsx::ondemand::array() noexcept(false); + simdjson_inline operator lsx::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator lsx::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -69234,9 +102513,9 @@ struct simdjson_result : public ppc64::implementation_si * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -69252,18 +102531,19 @@ struct simdjson_result : public ppc64::implementation_si * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. @@ -69272,12 +102552,13 @@ struct simdjson_result : public ppc64::implementation_si * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; @@ -69287,15 +102568,16 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H -/* end file simdjson/generic/ondemand/value.h for ppc64 */ -/* including simdjson/generic/ondemand/logger.h for ppc64: #include "simdjson/generic/ondemand/logger.h" */ -/* begin file simdjson/generic/ondemand/logger.h for ppc64 */ +/* end file simdjson/generic/ondemand/value.h for lsx */ +/* including simdjson/generic/ondemand/logger.h for lsx: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -69304,7 +102586,7 @@ struct simdjson_result : public ppc64::implementation_si /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical @@ -69350,13 +102632,13 @@ static inline void log_error(const value_iterator &iter, const char *error, cons } // namespace logger } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H -/* end file simdjson/generic/ondemand/logger.h for ppc64 */ -/* including simdjson/generic/ondemand/token_iterator.h for ppc64: #include "simdjson/generic/ondemand/token_iterator.h" */ -/* begin file simdjson/generic/ondemand/token_iterator.h for ppc64 */ +/* end file simdjson/generic/ondemand/logger.h for lsx */ +/* including simdjson/generic/ondemand/token_iterator.h for lsx: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -69367,7 +102649,7 @@ static inline void log_error(const value_iterator &iter, const char *error, cons /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -69406,7 +102688,7 @@ class token_iterator { * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -69436,7 +102718,14 @@ class token_iterator { * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; - + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Return the current index. */ @@ -69491,15 +102780,15 @@ class token_iterator { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -69508,9 +102797,9 @@ struct simdjson_result : public ppc64::implemen } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H -/* end file simdjson/generic/ondemand/token_iterator.h for ppc64 */ -/* including simdjson/generic/ondemand/json_iterator.h for ppc64: #include "simdjson/generic/ondemand/json_iterator.h" */ -/* begin file simdjson/generic/ondemand/json_iterator.h for ppc64 */ +/* end file simdjson/generic/ondemand/token_iterator.h for lsx */ +/* including simdjson/generic/ondemand/json_iterator.h for lsx: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -69521,7 +102810,7 @@ struct simdjson_result : public ppc64::implemen /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -69659,7 +102948,7 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -69687,7 +102976,7 @@ class json_iterator { * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -69698,13 +102987,21 @@ class json_iterator { * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; @@ -69798,6 +103095,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -69813,6 +103113,7 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; + friend class field; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template @@ -69820,15 +103121,15 @@ class json_iterator { }; // json_iterator } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -69837,9 +103138,9 @@ struct simdjson_result : public ppc64::implement } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H -/* end file simdjson/generic/ondemand/json_iterator.h for ppc64 */ -/* including simdjson/generic/ondemand/json_type.h for ppc64: #include "simdjson/generic/ondemand/json_type.h" */ -/* begin file simdjson/generic/ondemand/json_type.h for ppc64 */ +/* end file simdjson/generic/ondemand/json_iterator.h for lsx */ +/* including simdjson/generic/ondemand/json_type.h for lsx: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -69850,7 +103151,7 @@ struct simdjson_result : public ppc64::implement /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -69983,15 +103284,15 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t #endif } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -70000,9 +103301,9 @@ struct simdjson_result : public ppc64::implementatio } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H -/* end file simdjson/generic/ondemand/json_type.h for ppc64 */ -/* including simdjson/generic/ondemand/raw_json_string.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ +/* end file simdjson/generic/ondemand/json_type.h for lsx */ +/* including simdjson/generic/ondemand/raw_json_string.h for lsx: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -70012,7 +103313,7 @@ struct simdjson_result : public ppc64::implementatio /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -70188,30 +103489,30 @@ simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_js } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H -/* end file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ -/* including simdjson/generic/ondemand/parser.h for ppc64: #include "simdjson/generic/ondemand/parser.h" */ -/* begin file simdjson/generic/ondemand/parser.h for ppc64 */ +/* end file simdjson/generic/ondemand/raw_json_string.h for lsx */ +/* including simdjson/generic/ondemand/parser.h for lsx: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -70223,12 +103524,12 @@ struct simdjson_result : public ppc64::impleme #include namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -70298,6 +103599,22 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). @@ -70312,6 +103629,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -70456,9 +103776,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -70466,7 +103786,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -70489,8 +103809,12 @@ class parser { * behavior of the parser for future operations. */ bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif - /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. @@ -70538,9 +103862,20 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -70554,15 +103889,15 @@ class parser { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -70570,11 +103905,11 @@ struct simdjson_result : public ppc64::implementation_s } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H -/* end file simdjson/generic/ondemand/parser.h for ppc64 */ +/* end file simdjson/generic/ondemand/parser.h for lsx */ // All other declarations -/* including simdjson/generic/ondemand/array.h for ppc64: #include "simdjson/generic/ondemand/array.h" */ -/* begin file simdjson/generic/ondemand/array.h for ppc64 */ +/* including simdjson/generic/ondemand/array.h for lsx: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -70585,7 +103920,7 @@ struct simdjson_result : public ppc64::implementation_s /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -70621,7 +103956,8 @@ class array { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. * * To check that an array is empty, it is more performant to use * the is_empty() method. @@ -70677,6 +104013,22 @@ class array { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -70748,25 +104100,26 @@ class array { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; @@ -70774,9 +104127,9 @@ struct simdjson_result : public ppc64::implementation_si } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H -/* end file simdjson/generic/ondemand/array.h for ppc64 */ -/* including simdjson/generic/ondemand/array_iterator.h for ppc64: #include "simdjson/generic/ondemand/array_iterator.h" */ -/* begin file simdjson/generic/ondemand/array_iterator.h for ppc64 */ +/* end file simdjson/generic/ondemand/array.h for lsx */ +/* including simdjson/generic/ondemand/array_iterator.h for lsx: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -70788,7 +104141,7 @@ struct simdjson_result : public ppc64::implementation_si namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -70848,15 +104201,15 @@ class array_iterator { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -70864,28 +104217,31 @@ struct simdjson_result : public ppc64::implemen // Iterator interface // - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H -/* end file simdjson/generic/ondemand/array_iterator.h for ppc64 */ -/* including simdjson/generic/ondemand/document.h for ppc64: #include "simdjson/generic/ondemand/document.h" */ -/* begin file simdjson/generic/ondemand/document.h for ppc64 */ +/* end file simdjson/generic/ondemand/array_iterator.h for lsx */ +/* including simdjson/generic/ondemand/document.h for lsx: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -71056,22 +104412,39 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); } /** @@ -71085,11 +104458,57 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) & noexcept; + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -71173,7 +104592,8 @@ class document { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. */ simdjson_inline simdjson_result count_elements() & noexcept; /** @@ -71262,7 +104682,7 @@ class document { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array @@ -71285,6 +104705,7 @@ class document { simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -71308,6 +104729,14 @@ class document { */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + /** * Checks whether the document is a negative number. * @@ -71335,9 +104764,11 @@ class document { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected @@ -71455,6 +104886,14 @@ class document { * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values @@ -71471,6 +104910,30 @@ class document { * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing @@ -71511,6 +104974,11 @@ class document { /** * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. */ class document_reference { public: @@ -71536,10 +105004,82 @@ class document_reference { simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; - #if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); @@ -71559,11 +105099,13 @@ class document_reference { simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; @@ -71573,25 +105115,27 @@ class document_reference { simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + private: document *doc{nullptr}; }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -71602,52 +105146,56 @@ struct simdjson_result : public ppc64::implementation template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; - #if SIMDJSON_EXCEPTIONS - simdjson_inline operator ppc64::ondemand::array() & noexcept(false); - simdjson_inline operator ppc64::ondemand::object() & noexcept(false); + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lsx::ondemand::array() & noexcept(false); + simdjson_inline operator lsx::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator lsx::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator ppc64::ondemand::value() noexcept(false); + simdjson_inline operator lsx::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; @@ -71658,14 +105206,14 @@ struct simdjson_result : public ppc64::implementation namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result(lsx::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -71676,54 +105224,64 @@ struct simdjson_result : public ppc64::impl template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator ppc64::ondemand::array() & noexcept(false); - simdjson_inline operator ppc64::ondemand::object() & noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lsx::ondemand::array() & noexcept(false); + simdjson_inline operator lsx::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator lsx::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator ppc64::ondemand::value() noexcept(false); + simdjson_inline operator lsx::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H -/* end file simdjson/generic/ondemand/document.h for ppc64 */ -/* including simdjson/generic/ondemand/document_stream.h for ppc64: #include "simdjson/generic/ondemand/document_stream.h" */ -/* begin file simdjson/generic/ondemand/document_stream.h for ppc64 */ +/* end file simdjson/generic/ondemand/document.h for lsx */ +/* including simdjson/generic/ondemand/document_stream.h for lsx: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -71741,7 +105299,7 @@ struct simdjson_result : public ppc64::impl #endif namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -71848,10 +105406,9 @@ class document_stream { class iterator { public: using value_type = simdjson_result; - using reference = value_type; - + using reference = simdjson_result; + using pointer = void; using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; /** @@ -71861,7 +105418,7 @@ class document_stream { /** * Get the current document (or error). */ - simdjson_inline simdjson_result operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -72043,18 +105600,18 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -72062,9 +105619,9 @@ struct simdjson_result : public ppc64::impleme } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H -/* end file simdjson/generic/ondemand/document_stream.h for ppc64 */ -/* including simdjson/generic/ondemand/field.h for ppc64: #include "simdjson/generic/ondemand/field.h" */ -/* begin file simdjson/generic/ondemand/field.h for ppc64 */ +/* end file simdjson/generic/ondemand/document_stream.h for lsx */ +/* including simdjson/generic/ondemand/field.h for lsx: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -72076,7 +105633,7 @@ struct simdjson_result : public ppc64::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -72093,22 +105650,49 @@ class field : public std::pair { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_inline field() noexcept; - + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). * We deliberately use a more cumbersome name (unescaped_key) to force users - * to think twice about using it. + * to think twice about using it. The content is stored in the receiver. * * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; /** * Get the field value. */ @@ -72127,29 +105711,33 @@ class field : public std::pair { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; - simdjson_inline simdjson_result key() noexcept; - simdjson_inline simdjson_result value() noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H -/* end file simdjson/generic/ondemand/field.h for ppc64 */ -/* including simdjson/generic/ondemand/object.h for ppc64: #include "simdjson/generic/ondemand/object.h" */ -/* begin file simdjson/generic/ondemand/object.h for ppc64 */ +/* end file simdjson/generic/ondemand/field.h for lsx */ +/* including simdjson/generic/ondemand/object.h for lsx: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -72160,7 +105748,7 @@ struct simdjson_result : public ppc64::implementation_si /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { /** @@ -72208,6 +105796,8 @@ class object { * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -72229,7 +105819,7 @@ class object { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. @@ -72245,6 +105835,8 @@ class object { * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -72292,11 +105884,24 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -72358,27 +105963,29 @@ class object { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -72389,9 +105996,9 @@ struct simdjson_result : public ppc64::implementation_s } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H -/* end file simdjson/generic/ondemand/object.h for ppc64 */ -/* including simdjson/generic/ondemand/object_iterator.h for ppc64: #include "simdjson/generic/ondemand/object_iterator.h" */ -/* begin file simdjson/generic/ondemand/object_iterator.h for ppc64 */ +/* end file simdjson/generic/ondemand/object.h for lsx */ +/* including simdjson/generic/ondemand/object_iterator.h for lsx: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -72402,7 +106009,7 @@ struct simdjson_result : public ppc64::implementation_s /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { class object_iterator { @@ -72443,15 +106050,15 @@ class object_iterator { }; } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public ppc64::implementation_simdjson_result_base { +struct simdjson_result : public lsx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(ppc64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lsx::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -72460,21 +106067,21 @@ struct simdjson_result : public ppc64::impleme // // Reads key and value, yielding them to the user. - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H -/* end file simdjson/generic/ondemand/object_iterator.h for ppc64 */ -/* including simdjson/generic/ondemand/serialization.h for ppc64: #include "simdjson/generic/ondemand/serialization.h" */ -/* begin file simdjson/generic/ondemand/serialization.h for ppc64 */ +/* end file simdjson/generic/ondemand/object_iterator.h for lsx */ +/* including simdjson/generic/ondemand/serialization.h for lsx: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -72488,30 +106095,30 @@ namespace simdjson { * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept; +inline simdjson_result to_json_string(lsx::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ -inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept; +inline simdjson_result to_json_string(lsx::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept; +inline simdjson_result to_json_string(lsx::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(lsx::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** @@ -72521,7 +106128,7 @@ inline simdjson_result to_json_string(simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -72543,9 +106150,9 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. It does not @@ -72555,13 +106162,13 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value); +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document_reference& value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** * Print JSON to an output stream. It does not @@ -72571,22 +106178,237 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif -}}} // namespace simdjson::ppc64::ondemand +}}} // namespace simdjson::lsx::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H -/* end file simdjson/generic/ondemand/serialization.h for ppc64 */ +/* end file simdjson/generic/ondemand/serialization.h for lsx */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for lsx: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for lsx */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + lsx::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + lsx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + +/** + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. + * + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + +/** + * This CPO (Customization Point Object) will help deserialize into optional types. + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for lsx */ // Inline definitions -/* including simdjson/generic/ondemand/array-inl.h for ppc64: #include "simdjson/generic/ondemand/array-inl.h" */ -/* begin file simdjson/generic/ondemand/array-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/array-inl.h for lsx: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ @@ -72596,7 +106418,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for lsx */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::array_iterator &&value +) noexcept + : lsx::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : lsx::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/value-inl.h for lsx: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +template +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + + +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); +} +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { - bool has_value; - SIMDJSON_TRY(iter.started_array().get(has_value)); - return array(iter); + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result array::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline simdjson_result array::end() noexcept { - return array_iterator(iter); +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline error_code array::consume() noexcept { - auto error = iter.json_iter().skip_child(iter.depth()-1); - if(error) { iter.abandon(); } - return error; + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); } -simdjson_inline simdjson_result array::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline simdjson_result array::count_elements() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the array after counting the number of elements. - iter.reset_array(); - return count; +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_inline simdjson_result array::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_array().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); } -inline simdjson_result array::reset() & noexcept { - return iter.reset_array(); +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); } -inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); } +} - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - // Get the child - auto child = at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; } + return true; +} - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; } - return child; } -simdjson_inline simdjson_result array::at(size_t index) noexcept { - size_t i = 0; - for (auto value : *this) { - if (i == index) { return value; } - i++; +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; } - return INDEX_OUT_OF_BOUNDS; } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error -) noexcept - : implementation_simdjson_result_base(error) +) noexcept : + implementation_simdjson_result_base(error) { } - -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } - return first.end(); + return {}; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } - return first.is_empty(); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } - return first.at(index); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } - return first.raw_json(); + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H -/* end file simdjson/generic/ondemand/array-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/array_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} -namespace simdjson { -namespace ppc64 { -namespace ondemand { +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} -simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} -simdjson_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); } -simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); } -simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); } -simdjson_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if (( error = iter.error() )) { return *this; } - if (( error = iter.skip_child() )) { return *this; } - if (( error = iter.has_next_element().error() )) { return *this; } - return *this; +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::array_iterator &&value -) noexcept - : ppc64::implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : ppc64::implementation_simdjson_result_base({}, error) -{ + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } - return *first; + return first.current_location(); } -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; + +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; + +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/document-inl.h for ppc64: #include "simdjson/generic/ondemand/document-inl.h" */ -/* begin file simdjson/generic/ondemand/document-inl.h for ppc64 */ +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for lsx */ +/* including simdjson/generic/ondemand/document-inl.h for lsx: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept @@ -73060,22 +107448,29 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } simdjson_inline document::operator array() & noexcept(false) { return get_array(); } simdjson_inline document::operator object() & noexcept(false) { return get_object(); } simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } @@ -73132,7 +107527,14 @@ simdjson_inline simdjson_result document::operator[](const char *key) & n } simdjson_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } @@ -73154,12 +107556,21 @@ simdjson_inline simdjson_result document::type() noexcept { } simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } @@ -73179,7 +107590,7 @@ simdjson_inline simdjson_result document::get_number() noexcept { simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); } simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { @@ -73200,275 +107611,305 @@ simdjson_inline simdjson_result document::at_pointer(std::string_view jso } } +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::document &&value +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::document &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base( + implementation_simdjson_result_base( error ) { } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first).get(); + return std::forward(first).get(); } template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return std::forward(first).get(out); } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first); + return std::forward(first); } -template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { +template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) && noexcept { if (error()) { return error(); } - out = std::forward(first); + out = std::forward(first); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} -simdjson_inline bool simdjson_result::is_negative() noexcept { +simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline bool simdjson_result::at_end() const noexcept { +simdjson_inline bool simdjson_result::at_end() const noexcept { if (error()) { return error(); } return first.at_end(); } -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} } // namespace simdjson namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} @@ -73504,15 +107945,25 @@ simdjson_inline simdjson_result document_reference::get_raw_jso simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } - +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } #if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif @@ -73529,6 +107980,7 @@ simdjson_inline simdjson_result document_reference::find_field_unordered( simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } @@ -73537,214 +107989,264 @@ simdjson_inline simdjson_result document_reference::get_number_type simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(lsx::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(lsx::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/document-inl.h for lsx */ +/* including simdjson/generic/ondemand/document_stream-inl.h for lsx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -73759,7 +108261,7 @@ simdjson_inline simdjson_result simdjson_result namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -73899,7 +108401,6 @@ simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bo } simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } @@ -74089,9 +108590,18 @@ simdjson_inline std::string_view document_stream::iterator::source() const noexc depth--; break; default: // Scalar value document - // TODO: Remove any trailing whitespaces + // TODO: We could remove trailing whitespaces // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } } cur_struct_index++; } @@ -74146,22 +108656,22 @@ inline void document_stream::start_stage1_thread() noexcept { #endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::document_stream &&value +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::document_stream &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } @@ -74169,9 +108679,9 @@ simdjson_inline simdjson_result::simdjson_resu } #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/document_stream-inl.h for lsx */ +/* including simdjson/generic/ondemand/field-inl.h for lsx: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -74183,10 +108693,10 @@ simdjson_inline simdjson_result::simdjson_resu /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +// clang 6 does not think the default constructor can be noexcept, so we make it explicit simdjson_inline field::field() noexcept : std::pair() {} simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept @@ -74212,11 +108722,32 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +} + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +} + simdjson_inline value &field::value() & noexcept { return second; } @@ -74226,35 +108757,53 @@ simdjson_inline value field::value() && noexcept { } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::field &&value +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::field &&value ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); +} + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(allow_replacement); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { + +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } @@ -74262,9 +108811,9 @@ simdjson_inline simdjson_result simdjson_result simdjson_resultimplementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + inline void json_iterator::rewind() noexcept { token.set_position( root_position() ); logger::log_headers(); // We start again @@ -74553,6 +109119,12 @@ simdjson_inline uint32_t json_iterator::peek_length(token_position position) con #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); +} simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... @@ -74598,11 +109170,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -74661,22 +109245,22 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_type-inl.h for lsx: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -74687,7 +109271,7 @@ simdjson_inline simdjson_result::simdjson_result /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { @@ -74727,7 +109311,6 @@ simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } - simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } @@ -74782,22 +109365,22 @@ simdjson_inline void number::skip_double() noexcept { } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/json_type-inl.h for lsx */ +/* including simdjson/generic/ondemand/logger-inl.h for lsx: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -74812,7 +109395,7 @@ simdjson_inline simdjson_result::simdjson_result(err #include namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { namespace logger { @@ -75019,13 +109602,13 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de } // namespace logger } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/logger-inl.h for lsx */ +/* including simdjson/generic/ondemand/object-inl.h for lsx: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -75040,7 +109623,7 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { @@ -75192,6 +109775,14 @@ inline simdjson_result object::at_pointer(std::string_view json_pointer) return child; } +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); +} + simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. @@ -75216,79 +109807,87 @@ simdjson_inline simdjson_result object::reset() & noexcept { } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); + return std::forward(first).find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first)[key]; + return std::forward(first)[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } - return std::forward(first).find_field(key); + return std::forward(first).find_field(key); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } -inline simdjson_result simdjson_result::reset() noexcept { +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} + +inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); } -inline simdjson_result simdjson_result::is_empty() noexcept { +inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } return first.is_empty(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/object-inl.h for lsx */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -75300,7 +109899,7 @@ simdjson_inline simdjson_result simdjson_result::simdjson_result( - ppc64::ondemand::object_iterator &&value +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::object_iterator &&value ) noexcept - : implementation_simdjson_result_base(std::forward(value)) + : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) { } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; @@ -75427,9 +110026,9 @@ simdjson_inline simdjson_result &simdjson_resu } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/parser-inl.h for lsx: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -75446,7 +110045,7 @@ simdjson_inline simdjson_result &simdjson_resu /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { simdjson_inline parser::parser(size_t max_capacity) noexcept @@ -75474,6 +110073,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -75490,6 +110094,27 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p return document::start({ reinterpret_cast(json.data()), this }); } +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); +} +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } @@ -75561,13 +110186,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -75596,22 +110221,22 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/parser-inl.h for lsx */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for lsx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -75624,46 +110249,49 @@ simdjson_inline simdjson_result::simdjson_result(error_ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } } + pos++; } return true; } @@ -75675,7 +110303,7 @@ simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string } simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and + // Assumptions: does not contain unescaped quote characters("), and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); @@ -75790,34 +110418,34 @@ simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, cons } } // namespace ondemand -} // namespace ppc64 +} // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape_wobbly(iter); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ +/* including simdjson/generic/ondemand/serialization-inl.h for lsx: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -75844,43 +110472,43 @@ inline std::string_view trim(const std::string_view str) noexcept { } -inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { +inline simdjson_result to_json_string(lsx::ondemand::document& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { +inline simdjson_result to_json_string(lsx::ondemand::document_reference& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { +inline simdjson_result to_json_string(lsx::ondemand::value& x) noexcept { /** * If we somehow receive a value that has already been consumed, * then the following code could be in trouble. E.g., we create * an array as needed, but if an array was already created, then * it could be bad. */ - using namespace ppc64::ondemand; - ppc64::ondemand::json_type t; + using namespace lsx::ondemand; + lsx::ondemand::json_type t; auto error = x.type().get(t); if(error != SUCCESS) { return error; } switch (t) { case json_type::array: { - ppc64::ondemand::array array; + lsx::ondemand::array array; error = x.get_array().get(array); if(error) { return error; } return to_json_string(array); } case json_type::object: { - ppc64::ondemand::object object; + lsx::ondemand::object object; error = x.get_object().get(object); if(error) { return error; } return to_json_string(object); @@ -75890,50 +110518,50 @@ inline simdjson_result to_json_string(ppc64::ondemand::value& } } -inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { +inline simdjson_result to_json_string(lsx::ondemand::object& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { +inline simdjson_result to_json_string(lsx::ondemand::array& x) noexcept { std::string_view v; auto error = x.raw_json().get(v); if(error) {return error; } return trim(v); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } -inline simdjson_result to_json_string(simdjson_result x) { +inline simdjson_result to_json_string(simdjson_result x) { if (x.error()) { return x.error(); } return to_json_string(x.value_unsafe()); } } // namespace simdjson -namespace simdjson { namespace ppc64 { namespace ondemand { +namespace simdjson { namespace lsx { namespace ondemand { #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -75942,12 +110570,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::va throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { std::string_view v; auto error = simdjson::to_json_string(x).get(v); if(error == simdjson::SUCCESS) { @@ -75959,7 +110587,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::va #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -75968,12 +110596,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::ar throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -75985,7 +110613,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::ar #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -75994,7 +110622,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::do throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document_reference& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -76003,16 +110631,16 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::do throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -76024,7 +110652,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::do #endif #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -76033,12 +110661,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::ob throw simdjson::simdjson_error(error); } } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { if (x.error()) { throw simdjson::simdjson_error(x.error()); } return (out << x.value()); } #else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { std::string_view v; auto error = simdjson::to_json_string(value).get(v); if(error == simdjson::SUCCESS) { @@ -76048,12 +110676,12 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::ob } } #endif -}}} // namespace simdjson::ppc64::ondemand +}}} // namespace simdjson::lsx::ondemand #endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/serialization-inl.h for lsx */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -76064,7 +110692,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::ob /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { +namespace lsx { namespace ondemand { simdjson_inline token_iterator::token_iterator( @@ -76075,553 +110703,82 @@ simdjson_inline token_iterator::token_iterator( } simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); -} - - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; -} - -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; -} -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; -} - -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; -} -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); -} -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); -} - -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; -} -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; -} - -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; -} -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; -} -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; -} -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; -} -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; -} -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; -} - -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/value-inl.h for ppc64: #include "simdjson/generic/ondemand/value-inl.h" */ -/* begin file simdjson/generic/ondemand/value-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace ppc64 { -namespace ondemand { - -simdjson_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_inline value value::resume(const value_iterator &iter) noexcept { - return iter; -} - -simdjson_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); - } else { - return object::resume(iter); - } -} - -simdjson_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { - return iter.get_string(allow_replacement); -} -template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { - return iter.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result value::get_wobbly_string() noexcept { - return iter.get_wobbly_string(); -} -simdjson_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); -} -simdjson_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); -} -simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); -} -simdjson_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); -} -simdjson_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); -} -simdjson_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); -} -simdjson_inline simdjson_result value::is_null() noexcept { - return iter.is_null(); -} -template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } - -template simdjson_inline error_code value::get(T &out) noexcept { - return get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_inline value::operator object() noexcept(false) { - return get_object(); -} -simdjson_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); -} -simdjson_inline value::operator int64_t() noexcept(false) { - return get_int64(); -} -simdjson_inline value::operator double() noexcept(false) { - return get_double(); -} -simdjson_inline value::operator std::string_view() noexcept(false) { - return get_string(false); -} -simdjson_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); -} -simdjson_inline value::operator bool() noexcept(false) { - return get_bool(); -} -#endif - -simdjson_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); -} - -simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); -} - -simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} - -simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; -} - -simdjson_inline simdjson_result value::type() noexcept { - return iter.type(); -} - -simdjson_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_inline bool value::is_negative() noexcept { - return iter.is_negative(); -} - -simdjson_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); -} - -simdjson_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); -} - -simdjson_inline simdjson_result value::raw_json() noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: { - ondemand::array array; - SIMDJSON_TRY(get_array().get(array)); - return array.raw_json(); - } - case json_type::object: { - ondemand::object object; - SIMDJSON_TRY(get_object().get(object)); - return object.raw_json(); - } - default: - return raw_json_token(); - } -} - -simdjson_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); -} - -simdjson_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); -} - -simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; -} - -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} - -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} - -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; -} - -simdjson_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); -} -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); -} -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); -} - -template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); -} -template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + return *(_position); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator ppc64::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); } -simdjson_inline simdjson_result::operator ppc64::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; } -simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } -#endif - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} +} // namespace ondemand +} // namespace lsx +} // namespace simdjson -simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} +namespace simdjson { -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H -/* end file simdjson/generic/ondemand/value-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -76630,13 +110787,13 @@ simdjson_inline simdjson_result simdjson_result value_iterator::find_ } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -76873,7 +111030,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_ #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -77130,1270 +111287,681 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; -} - -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); -} -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; -} -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; -} -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); -} -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); -} -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); -} -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); -} -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; -} - -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; -} - -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; -} -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); -} -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); - } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); - } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); - } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); - } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); - } - return result; -} - -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); - } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; -} -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } - return result; -} - -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); - - return _json_iter->skip_child(depth()); -} - -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} - -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} - -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} - -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} - -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} - -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); -} - -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } - - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} - -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } - - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} - -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); - } - - - return SUCCESS; -} - - -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } - - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } - - assert_at_non_root_start(); - return _json_iter->peek(); -} - -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } - - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } - -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } - -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } - -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } - -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } - -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; } - -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; } - -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; } - -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; } - -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } - -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); } - -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); } - -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); } - -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; } -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; } -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } - -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); } - -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } - -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); } - -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ -/* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ -/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ -/* begin file simdjson/ppc64/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/ppc64/end.h */ - -#endif // SIMDJSON_PPC64_ONDEMAND_H -/* end file simdjson/ppc64/ondemand.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) -/* including simdjson/westmere/ondemand.h: #include "simdjson/westmere/ondemand.h" */ -/* begin file simdjson/westmere/ondemand.h */ -#ifndef SIMDJSON_WESTMERE_ONDEMAND_H -#define SIMDJSON_WESTMERE_ONDEMAND_H - -/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ -/* begin file simdjson/westmere/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ -#define SIMDJSON_IMPLEMENTATION westmere -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { - -class implementation; - -namespace { -namespace simd { - -template struct simd8; -template struct simd8x64; - -} // namespace simd -} // unnamed namespace - -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - - -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif - -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); - -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ - -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") -#endif - -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace westmere { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -// This function can be used safely even if not all bytes have been -// initialized. -// See issue https://github.com/simdjson/simdjson/issues/1965 -SIMDJSON_NO_SANITIZE_MEMORY -simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); } - -/* result might be undefined when input_num is zero */ -simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; } - -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } -#endif +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ -/* begin file simdjson/westmere/bitmask.h */ -#ifndef SIMDJSON_WESTMERE_BITMASK_H -#define SIMDJSON_WESTMERE_BITMASK_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace westmere { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_BITMASK_H -/* end file simdjson/westmere/bitmask.h */ -/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ -/* begin file simdjson/westmere/numberparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H - -/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ -/* begin file simdjson/westmere/base.h */ -#ifndef SIMDJSON_WESTMERE_BASE_H -#define SIMDJSON_WESTMERE_BASE_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; +} -class implementation; +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); + } + return result; +} -namespace { -namespace simd { +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -template struct simd8; -template struct simd8x64; + return _json_iter->skip_child(depth()); +} -} // namespace simd -} // unnamed namespace +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} -} // namespace westmere -} // namespace simdjson +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS -#endif // SIMDJSON_WESTMERE_BASE_H -/* end file simdjson/westmere/base.h */ -/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ -/* begin file simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} -#if SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -#if SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file simdjson/westmere/intrinsics.h */ +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { -namespace westmere { -namespace numberparsing { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif - return answer; -} - -} // namespace numberparsing -} // namespace westmere -} // namespace simdjson + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -#define SIMDJSON_SWAR_NUMBER_PARSING 1 -#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H -/* end file simdjson/westmere/numberparsing_defs.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H + return SUCCESS; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -namespace simdjson { -namespace westmere { -namespace { -namespace simd { +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } - template - struct base { - __m128i value; + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } - // Zero constructor - simdjson_inline base() : value{__m128i()} {} + assert_at_non_root_start(); + return _json_iter->peek(); +} - // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } - // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } - // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} - static const int SIZE = sizeof(base>::value); +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} - template - simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_inline simd8 operator~() const { return *this ^ true; } - }; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} - template - struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} - simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} - // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} - // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} - template - simdjson_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} - // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} - // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} - // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} - // Bit-specific operations - simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; +} // namespace ondemand +} // namespace lsx +} // namespace simdjson - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed +namespace simdjson { - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} - simdjson_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } +} // namespace simdjson - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } +/* end file simdjson/generic/ondemand/amalgamated.h for lsx */ +/* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ +/* begin file simdjson/lsx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lsx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lsx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lsx/end.h */ + +#endif // SIMDJSON_LSX_ONDEMAND_H +/* end file simdjson/lsx/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(lasx) +/* including simdjson/lasx/ondemand.h: #include "simdjson/lasx/ondemand.h" */ +/* begin file simdjson/lasx/ondemand.h */ +#ifndef SIMDJSON_LASX_ONDEMAND_H +#define SIMDJSON_LASX_ONDEMAND_H + +/* including simdjson/lasx/begin.h: #include "simdjson/lasx/begin.h" */ +/* begin file simdjson/lasx/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "lasx" */ +#define SIMDJSON_IMPLEMENTATION lasx +/* including simdjson/lasx/base.h: #include "simdjson/lasx/base.h" */ +/* begin file simdjson/lasx/base.h */ +#ifndef SIMDJSON_LASX_BASE_H +#define SIMDJSON_LASX_BASE_H - simdjson_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - simdjson_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 +namespace simdjson { +/** + * Implementation for LASX. + */ +namespace lasx { +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; } // namespace simd } // unnamed namespace -} // namespace westmere + +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ -/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ -/* begin file simdjson/westmere/stringparsing_defs.h */ -#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#endif // SIMDJSON_LASX_BASE_H +/* end file simdjson/lasx/base.h */ +/* including simdjson/lasx/intrinsics.h: #include "simdjson/lasx/intrinsics.h" */ +/* begin file simdjson/lasx/intrinsics.h */ +#ifndef SIMDJSON_LASX_INTRINSICS_H +#define SIMDJSON_LASX_INTRINSICS_H -/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ -/* begin file simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for LoongArch ASX"); + +#endif // SIMDJSON_LASX_INTRINSICS_H +/* end file simdjson/lasx/intrinsics.h */ +/* including simdjson/lasx/bitmanipulation.h: #include "simdjson/lasx/bitmanipulation.h" */ +/* begin file simdjson/lasx/bitmanipulation.h */ +#ifndef SIMDJSON_LASX_BITMANIPULATION_H +#define SIMDJSON_LASX_BITMANIPULATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmask.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { // We sometimes call trailing_zero on inputs that are zero, @@ -78405,15 +111973,7 @@ SIMDJSON_NO_SANITIZE_UNDEFINED // See issue https://github.com/simdjson/simdjson/issues/1965 SIMDJSON_NO_SANITIZE_MEMORY simdjson_inline int trailing_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO } /* result might be undefined when input_num is zero */ @@ -78423,126 +111983,204 @@ simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { /* result might be undefined when input_num is zero */ simdjson_inline int leading_zeroes(uint64_t input_num) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO } -#if SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows in this kernel - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return __lasx_xvpickve2gr_w(__lasx_xvpcnt_d(__m256i(v4u64{input_num, 0, 0, 0})), 0); } -#endif -simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#if SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { return __builtin_uaddll_overflow(value1, value2, reinterpret_cast(result)); -#endif } } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file simdjson/westmere/bitmanipulation.h */ -/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ -/* begin file simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H +#endif // SIMDJSON_LASX_BITMANIPULATION_H +/* end file simdjson/lasx/bitmanipulation.h */ +/* including simdjson/lasx/bitmask.h: #include "simdjson/lasx/bitmask.h" */ +/* begin file simdjson/lasx/bitmask.h */ +#ifndef SIMDJSON_LASX_BITMASK_H +#define SIMDJSON_LASX_BITMASK_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace lasx +} // namespace simdjson + +#endif +/* end file simdjson/lasx/bitmask.h */ +/* including simdjson/lasx/numberparsing_defs.h: #include "simdjson/lasx/numberparsing_defs.h" */ +/* begin file simdjson/lasx/numberparsing_defs.h */ +#ifndef SIMDJSON_LASX_NUMBERPARSING_DEFS_H +#define SIMDJSON_LASX_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace lasx { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); + return answer; +} + +} // namespace numberparsing +} // namespace lasx +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_LASX_NUMBERPARSING_DEFS_H +/* end file simdjson/lasx/numberparsing_defs.h */ +/* including simdjson/lasx/simd.h: #include "simdjson/lasx/simd.h" */ +/* begin file simdjson/lasx/simd.h */ +#ifndef SIMDJSON_LASX_SIMD_H +#define SIMDJSON_LASX_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { namespace simd { + // Forward-declared so they can be used by splat and friends. template struct base { - __m128i value; + __m256i value; // Zero constructor - simdjson_inline base() : value{__m128i()} {} + simdjson_inline base() : value{__m256i()} {} // Conversion from SIMD register - simdjson_inline base(const __m128i _value) : value(_value) {} + simdjson_inline base(const __m256i _value) : value(_value) {} // Conversion to SIMD register - simdjson_inline operator const __m128i&() const { return this->value; } - simdjson_inline operator __m128i&() { return this->value; } + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + simdjson_inline operator const v32i8&() const { return (v32i8&)this->value; } + simdjson_inline operator v32i8&() { return (v32i8&)this->value; } // Bit operations - simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child operator|(const Child other) const { return __lasx_xvor_v(*this, other); } + simdjson_inline Child operator&(const Child other) const { return __lasx_xvand_v(*this, other); } + simdjson_inline Child operator^(const Child other) const { return __lasx_xvxor_v(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return __lasx_xvandn_v(other, *this); } simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + template> struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - simdjson_inline base8() : base>() {} - simdjson_inline base8(const __m128i _value) : base>(_value) {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} - friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return __lasx_xvseq_b(lhs, rhs); } static const int SIZE = sizeof(base>::value); template simdjson_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + __m256i hi = __lasx_xvbsll_v(*this, N); + __m256i lo = __lasx_xvbsrl_v(*this, 16 - N); + __m256i tmp = __lasx_xvbsrl_v(prev_chunk, 16 - N); + lo = __lasx_xvpermi_q(lo, tmp, 0x21); + return __lasx_xvor_v(hi, lo); } }; // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); } - simdjson_inline simd8() : base8() {} - simdjson_inline simd8(const __m128i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor - simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline int to_bitmask() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (__lasx_xvpickve2gr_w(mask, 4) << 16) | (__lasx_xvpickve2gr_w(mask, 0)); + } + simdjson_inline bool any() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); + static simdjson_inline simd8 splat(T _value) { + return __lasx_xvreplgr2vr_b(_value); + } + static simdjson_inline simd8 zero() { return __lasx_xvldi(0); } + static simdjson_inline simd8 load(const T values[32]) { + return __lasx_xvld(reinterpret_cast(values), 0); } // Repeat 16 values as many times as necessary (usually for lookup tables) static simdjson_inline simd8 repeat_16( @@ -78550,68 +112188,73 @@ namespace simd { T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } simdjson_inline base8_numeric() : base8() {} - simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} // Store to array - simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } - - // Override to distinguish from bool version - simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline void store(T dst[32]) const { + return __lasx_xvst(*this, reinterpret_cast<__m256i *>(dst), 0); + } // Addition/subtraction are the same for signed and unsigned - simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8 operator+(const simd8 other) const { return __lasx_xvadd_b(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return __lasx_xvsub_b(*this, other); } simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template simdjson_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); + return __lasx_xvshuf_b(lookup_table, lookup_table, *this); } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_inline void compress(uint16_t mask, L * output) const { + simdjson_inline void compress(uint32_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes + // this particular implementation was inspired by haswell + // lasx do it in 4 steps, first 8 bytes and then second 8 bytes... uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + uint8_t mask2 = uint8_t(mask >> 8); // second significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask{1,2,3,4}] + // into a 256-bit register. + __m256i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808, int64_t(thintable_epi8[mask3]), int64_t(thintable_epi8[mask4]) + 0x0808080808080808}; // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: + __m256i pruned = __lasx_xvshuf_b(*this, *this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + int pop2 = BitsSetTable256mul2[mask2]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + __m256i masklo = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop1 * 8); + __m256i maskhi = __lasx_xvldx(reinterpret_cast(reinterpret_cast(pshufb_combine_table)), pop3 * 8); + __m256i compactmask = __lasx_xvpermi_q(maskhi, masklo, 0x20); + __m256i answer = __lasx_xvshuf_b(pruned, pruned, compactmask); + __lasx_xvst(answer, reinterpret_cast(output), 0); + uint64_t value3 = __lasx_xvpickve2gr_du(answer, 2); + uint64_t value4 = __lasx_xvpickve2gr_du(answer, 3); + uint64_t *pos = reinterpret_cast(reinterpret_cast(output) + 16 - (pop1 + pop2) / 2); + pos[0] = value3; + pos[1] = value4; } template @@ -78633,72 +112276,84 @@ namespace simd { template<> struct simd8 : base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8({ v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + }) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Order-sensitive comparisons - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_b(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_b(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return __lasx_xvslt_b(other, *this); } + simdjson_inline simd8 operator<(const simd8 other) const { return __lasx_xvslt_b(*this, other); } }; // Unsigned bytes template<> struct simd8: base8_numeric { simdjson_inline simd8() : base8_numeric() {} - simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(__m256i(v32u8{ v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + })) {} // Repeat 16 values as many times as necessary (usually for lookup tables) simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,v11,v12,v13,v14,v15 ); } // Saturated math - simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return __lasx_xvsadd_bu(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return __lasx_xvssub_bu(*this, other); } // Order-specific operations - simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return __lasx_xvmax_bu(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return __lasx_xvmin_bu(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) @@ -78706,84 +112361,91 @@ namespace simd { simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool is_ascii() const { + __m256i mask = __lasx_xvmskltz_b(*this); + return (0 == __lasx_xvpickve2gr_w(mask, 0)) && (0 == __lasx_xvpickve2gr_w(mask, 4)); + } + simdjson_inline bool bits_not_set_anywhere() const { + __m256i v = __lasx_xvmsknz_b(*this); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + __m256i v = __lasx_xvmsknz_b(__lasx_xvand_v(*this, bits)); + return (0 == __lasx_xvpickve2gr_w(v, 0)) && (0 == __lasx_xvpickve2gr_w(v, 4)); + } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit + simdjson_inline simd8 shr() const { return simd8(__lasx_xvsrli_b(*this, N)); } template - simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + simdjson_inline simd8 shl() const { return simd8(__lasx_xvslli_b(*this, N)); } }; template struct simd8x64 { static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + static_assert(NUM_CHUNKS == 2, "LASX kernel should use two registers per 64-byte block."); const simd8 chunks[NUM_CHUNKS]; simd8x64(const simd8x64& o) = delete; // no copy allowed simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + __m256i zcnt = __lasx_xvpcnt_w(__m256i(v4u64{~mask, 0, 0, 0})); + uint64_t zcnt1 = __lasx_xvpickve2gr_wu(zcnt, 0); + uint64_t zcnt2 = __lasx_xvpickve2gr_wu(zcnt, 1); + // There should be a critical value which processes in scaler is faster. + if (zcnt1) + this->chunks[0].compress(mask1, output); + if (zcnt2) + this->chunks[1].compress(mask2, output + zcnt1); + return zcnt1 + zcnt2; + } simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } - - simdjson_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); + simdjson_inline uint64_t to_bitmask() const { + __m256i mask0 = __lasx_xvmskltz_b(this->chunks[0]); + __m256i mask1 = __lasx_xvmskltz_b(this->chunks[1]); + __m256i mask_tmp = __lasx_xvpickve_w(mask0, 4); + __m256i tmp = __lasx_xvpickve_w(mask1, 4); + mask0 = __lasx_xvinsve0_w(mask0, mask1, 1); + mask_tmp = __lasx_xvinsve0_w(mask_tmp, tmp, 1); + return __lasx_xvpickve2gr_du(__lasx_xvpackev_h(mask_tmp, mask0), 0); } - simdjson_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; } simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask + this->chunks[1] == mask ).to_bitmask(); } simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] + this->chunks[1] == other.chunks[1] ).to_bitmask(); } @@ -78791,23 +112453,31 @@ namespace simd { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask + this->chunks[1] <= mask ).to_bitmask(); } }; // struct simd8x64 } // namespace simd } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file simdjson/westmere/simd.h */ +#endif // SIMDJSON_LASX_SIMD_H +/* end file simdjson/lasx/simd.h */ +/* including simdjson/lasx/stringparsing_defs.h: #include "simdjson/lasx/stringparsing_defs.h" */ +/* begin file simdjson/lasx/stringparsing_defs.h */ +#ifndef SIMDJSON_LASX_STRINGPARSING_DEFS_H +#define SIMDJSON_LASX_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace { using namespace simd; @@ -78831,33 +112501,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + 16); - v0.store(dst); - v1.store(dst + 16); - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + simd8 v(src); + v.store(dst); return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits }; } } // unnamed namespace -} // namespace westmere +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H -/* end file simdjson/westmere/stringparsing_defs.h */ -/* end file simdjson/westmere/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for westmere: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for westmere */ +#endif // SIMDJSON_LASX_STRINGPARSING_DEFS_H +/* end file simdjson/lasx/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/lasx/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for lasx: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for lasx */ #if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) #error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! #endif // Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for westmere: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for westmere */ +/* including simdjson/generic/ondemand/base.h for lasx: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -78866,7 +112535,7 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { /** * A fast, simple, DOM-like interface that parses JSON as you use it. * @@ -78877,8 +112546,8 @@ namespace ondemand { /** Represents the depth of a JSON value (number of nested arrays/objects). */ using depth_t = int32_t; -/** @copydoc simdjson::westmere::number_type */ -using number_type = simdjson::westmere::number_type; +/** @copydoc simdjson::lasx::number_type */ +using number_type = simdjson::lasx::number_type; /** @private Position in the JSON buffer indexes */ using token_position = const uint32_t *; @@ -78901,13 +112570,139 @@ class value; class value_iterator; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for westmere */ -/* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for westmere */ +/* end file simdjson/generic/ondemand/base.h for lasx */ +/* including simdjson/generic/ondemand/deserialize.h for lasx: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for lasx */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +namespace simdjson { + +namespace tag_invoke_fn_ns { +void tag_invoke(); + +struct tag_invoke_fn { + template + requires requires(Tag tag, Args &&...args) { + tag_invoke(std::forward(tag), std::forward(args)...); + } + constexpr auto operator()(Tag tag, Args &&...args) const + noexcept(noexcept(tag_invoke(std::forward(tag), + std::forward(args)...))) + -> decltype(tag_invoke(std::forward(tag), + std::forward(args)...)) { + return tag_invoke(std::forward(tag), std::forward(args)...); + } +}; +} // namespace tag_invoke_fn_ns + +inline namespace tag_invoke_ns { +inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; +} // namespace tag_invoke_ns + +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + +template +using tag_invoke_result = + std::invoke_result; + +template +using tag_invoke_result_t = + std::invoke_result_t; + +template using tag_t = std::decay_t; + + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using value_type = lasx::ondemand::value; + using document_type = lasx::ondemand::document; + using document_reference_type = lasx::ondemand::document_reference; + + // Customization Point for value + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + +/* end file simdjson/generic/ondemand/deserialize.h for lasx */ +/* including simdjson/generic/ondemand/value_iterator.h for lasx: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -78917,7 +112712,7 @@ class value_iterator; /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -79035,7 +112830,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -79045,7 +112841,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -79167,7 +112964,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -79177,7 +112975,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -79285,6 +113084,7 @@ class value_iterator { simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_inline const uint8_t *peek_start() const noexcept; simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -79379,18 +113179,19 @@ class value_iterator { friend class object; friend class array; friend class value; + friend class field; }; // value_iterator } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::value_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -79398,9 +113199,9 @@ struct simdjson_result : public westmere::im } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H -/* end file simdjson/generic/ondemand/value_iterator.h for westmere */ -/* including simdjson/generic/ondemand/value.h for westmere: #include "simdjson/generic/ondemand/value.h" */ -/* begin file simdjson/generic/ondemand/value.h for westmere */ +/* end file simdjson/generic/ondemand/value_iterator.h for lasx */ +/* including simdjson/generic/ondemand/value.h for lasx: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -79408,12 +113209,15 @@ struct simdjson_result : public westmere::im /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include + namespace simdjson { -namespace westmere { -namespace ondemand { +namespace lasx { +namespace ondemand { /** * An ephemeral JSON value returned during iteration. It is only valid for as long as you do * not access more data in the JSON document. @@ -79438,15 +113242,21 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } + /** * Get this value as the given type. * @@ -79456,7 +113266,38 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) noexcept; + template + simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + #if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif + } /** * Cast this JSON value to an array. @@ -79532,6 +113373,17 @@ class value { * Important: a value should be consumed once. Calling get_string() twice on the same value * is an error. * + * In some instances, you may want to allow replacement of invalid Unicode sequences. + * You may do so by passing the allow_replacement parameter as true. In the following + * example, the string "431924697b\udff0L\u0001Y" is not valid Unicode. By passing true + * to get_string, we allow the replacement of the invalid Unicode sequences with the Unicode + * replacement character (U+FFFD). + * + * simdjson::ondemand::parser parser; + * auto json = R"({"deviceId":"431924697b\udff0L\u0001Y"})"_padded; + * simdjson::ondemand::document doc = parser.iterate(json); + * auto view = doc["deviceId"].get_string(true); + * * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. @@ -79597,6 +113449,17 @@ class value { simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.). + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() noexcept(false); /** * Cast this JSON value to an array. * @@ -79760,7 +113623,7 @@ class value { * that only one field is returned. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. @@ -79772,6 +113635,7 @@ class value { simdjson_inline simdjson_result operator[](std::string_view key) noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -79796,6 +113660,13 @@ class value { * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the value is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; /** * Checks whether the value is a negative number. @@ -79828,10 +113699,12 @@ class value { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number + * integer that is less than 9223372036854775808. + * get_number_type() is number_type::big_integer for integers that do not fit in 64 bits, + * in which case the digit_count is set to the length of the big integer string. + * Otherwise, get_number_type() has value number_type::floating_point_number. * * This function requires processing the number string, but it is expected * to be faster than get_number().get_number_type() because it is does not @@ -79858,6 +113731,8 @@ class value { * You can recover the value by calling number.get_uint64() and you * have that number.is_uint64() is true. * + * For integers that do not fit in 64 bits, the function returns BIGINT_ERROR error code. + * * Otherwise, number.get_number_type() has value number_type::floating_point_number * and we have a binary64 number. * You can recover the value by calling number.get_double() and you @@ -79873,7 +113748,6 @@ class value { */ simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; - /** * Get the raw JSON for this token. * @@ -79971,6 +113845,20 @@ class value { */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + + protected: /** * Create a value. @@ -80010,23 +113898,24 @@ class value { friend class object; friend struct simdjson_result; friend struct simdjson_result; + friend class field; }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::value &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result get_array() noexcept; - simdjson_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; @@ -80038,7 +113927,7 @@ struct simdjson_result : public westmere::implementat template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; simdjson_inline simdjson_result is_null() noexcept; @@ -80047,20 +113936,22 @@ struct simdjson_result : public westmere::implementat template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator westmere::ondemand::array() noexcept(false); - simdjson_inline operator westmere::ondemand::object() noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lasx::ondemand::array() noexcept(false); + simdjson_inline operator lasx::ondemand::object() noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator lasx::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -80082,9 +113973,9 @@ struct simdjson_result : public westmere::implementat * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -80100,18 +113991,19 @@ struct simdjson_result : public westmere::implementat * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field as not there when they are not in order). * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + simdjson_result operator[](int) noexcept = delete; /** * Get the type of this JSON value. @@ -80120,12 +114012,13 @@ struct simdjson_result : public westmere::implementat * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; @@ -80135,15 +114028,16 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result current_location() noexcept; /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ simdjson_inline simdjson_result current_depth() const noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H -/* end file simdjson/generic/ondemand/value.h for westmere */ -/* including simdjson/generic/ondemand/logger.h for westmere: #include "simdjson/generic/ondemand/logger.h" */ -/* begin file simdjson/generic/ondemand/logger.h for westmere */ +/* end file simdjson/generic/ondemand/value.h for lasx */ +/* including simdjson/generic/ondemand/logger.h for lasx: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80152,7 +114046,7 @@ struct simdjson_result : public westmere::implementat /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { // Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical @@ -80198,13 +114092,13 @@ static inline void log_error(const value_iterator &iter, const char *error, cons } // namespace logger } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H -/* end file simdjson/generic/ondemand/logger.h for westmere */ -/* including simdjson/generic/ondemand/token_iterator.h for westmere: #include "simdjson/generic/ondemand/token_iterator.h" */ -/* begin file simdjson/generic/ondemand/token_iterator.h for westmere */ +/* end file simdjson/generic/ondemand/logger.h for lasx */ +/* including simdjson/generic/ondemand/token_iterator.h for lasx: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80215,7 +114109,7 @@ static inline void log_error(const value_iterator &iter, const char *error, cons /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -80254,7 +114148,7 @@ class token_iterator { * 1 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -80284,7 +114178,14 @@ class token_iterator { * @param position The position of the token. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; - + /** + * Get the maximum length of the JSON text for a root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token (start of the document). + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Return the current index. */ @@ -80339,15 +114240,15 @@ class token_iterator { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::token_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -80356,9 +114257,9 @@ struct simdjson_result : public westmere::im } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H -/* end file simdjson/generic/ondemand/token_iterator.h for westmere */ -/* including simdjson/generic/ondemand/json_iterator.h for westmere: #include "simdjson/generic/ondemand/json_iterator.h" */ -/* begin file simdjson/generic/ondemand/json_iterator.h for westmere */ +/* end file simdjson/generic/ondemand/token_iterator.h for lasx */ +/* including simdjson/generic/ondemand/json_iterator.h for lasx: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80369,7 +114270,7 @@ struct simdjson_result : public westmere::im /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -80507,7 +114408,7 @@ class json_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** @@ -80535,7 +114436,7 @@ class json_iterator { * @param position The position of the token to retrieve. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -80546,13 +114447,21 @@ class json_iterator { * @param position The position of the token to retrieve. */ simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current root token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_root_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * * This is not null-terminated; it is a view into the JSON. * * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... + * it is not used ... */ simdjson_inline const uint8_t *peek_last() const noexcept; @@ -80646,6 +114555,9 @@ class json_iterator { inline bool balanced() const noexcept; protected: simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser, bool streaming) noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /// The last token before the end simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. @@ -80661,6 +114573,7 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; + friend class field; template friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; template @@ -80668,15 +114581,15 @@ class json_iterator { }; // json_iterator } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::json_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -80685,9 +114598,9 @@ struct simdjson_result : public westmere::imp } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H -/* end file simdjson/generic/ondemand/json_iterator.h for westmere */ -/* including simdjson/generic/ondemand/json_type.h for westmere: #include "simdjson/generic/ondemand/json_type.h" */ -/* begin file simdjson/generic/ondemand/json_type.h for westmere */ +/* end file simdjson/generic/ondemand/json_iterator.h for lasx */ +/* including simdjson/generic/ondemand/json_type.h for lasx: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80698,7 +114611,7 @@ struct simdjson_result : public westmere::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -80831,15 +114744,15 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t #endif } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::json_type &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private @@ -80848,9 +114761,9 @@ struct simdjson_result : public westmere::impleme } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H -/* end file simdjson/generic/ondemand/json_type.h for westmere */ -/* including simdjson/generic/ondemand/raw_json_string.h for westmere: #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string.h for westmere */ +/* end file simdjson/generic/ondemand/json_type.h for lasx */ +/* including simdjson/generic/ondemand/raw_json_string.h for lasx: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80860,7 +114773,7 @@ struct simdjson_result : public westmere::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -81036,30 +114949,30 @@ simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_js } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::raw_json_string &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; - simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H -/* end file simdjson/generic/ondemand/raw_json_string.h for westmere */ -/* including simdjson/generic/ondemand/parser.h for westmere: #include "simdjson/generic/ondemand/parser.h" */ -/* begin file simdjson/generic/ondemand/parser.h for westmere */ +/* end file simdjson/generic/ondemand/raw_json_string.h for lasx */ +/* including simdjson/generic/ondemand/parser.h for lasx: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -81071,12 +114984,12 @@ struct simdjson_result : public westmere::i #include namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -81146,6 +115059,22 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * + * ### std::string references + * + * If you pass a mutable std::string reference (std::string&), the parser will seek to extend + * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. + * + * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * the string but before the end of the allocated memory (std::string::capacity()). + * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's + * container-overflow checks, you may encounter sanitizer warnings. + * You can safely ignore these warnings. Or you can call simdjson::pad(std::string&) to pad the + * string with SIMDJSON_PADDING spaces: this function returns a simdjson::padding_string_view + * which can be be passed to the parser's iterate function: + * + * std::string json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"; + * document doc = parser.iterate(simdjson::pad(json)); + * * @param json The JSON to parse. * @param len The length of the JSON. * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). @@ -81160,6 +115089,9 @@ class parser { * - UNCLOSED_STRING if there is an unclosed string in the document. */ simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + simdjson_warn_unused simdjson_result iterate_allow_incomplete_json(padded_string_view json) & noexcept; +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ @@ -81304,9 +115236,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -81314,7 +115246,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -81337,8 +115269,12 @@ class parser { * behavior of the parser for future operations. */ bool threaded{true}; + #else + /** + * When SIMDJSON_THREADS_ENABLED is not defined, the parser instance cannot use threads. + */ + bool threaded{false}; #endif - /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. * The result must be valid UTF-8. @@ -81386,9 +115322,20 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; + std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; @@ -81402,15 +115349,15 @@ class parser { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::parser &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -81418,11 +115365,11 @@ struct simdjson_result : public westmere::implementa } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H -/* end file simdjson/generic/ondemand/parser.h for westmere */ +/* end file simdjson/generic/ondemand/parser.h for lasx */ // All other declarations -/* including simdjson/generic/ondemand/array.h for westmere: #include "simdjson/generic/ondemand/array.h" */ -/* begin file simdjson/generic/ondemand/array.h for westmere */ +/* including simdjson/generic/ondemand/array.h for lasx: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -81433,7 +115380,7 @@ struct simdjson_result : public westmere::implementa /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -81469,7 +115416,8 @@ class array { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. * * To check that an array is empty, it is more performant to use * the is_empty() method. @@ -81525,6 +115473,22 @@ class array { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -81596,25 +115560,26 @@ class array { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::array &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_inline simdjson_result at(size_t index) noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; }; @@ -81622,9 +115587,9 @@ struct simdjson_result : public westmere::implementat } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H -/* end file simdjson/generic/ondemand/array.h for westmere */ -/* including simdjson/generic/ondemand/array_iterator.h for westmere: #include "simdjson/generic/ondemand/array_iterator.h" */ -/* begin file simdjson/generic/ondemand/array_iterator.h for westmere */ +/* end file simdjson/generic/ondemand/array.h for lasx */ +/* including simdjson/generic/ondemand/array_iterator.h for lasx: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -81636,7 +115601,7 @@ struct simdjson_result : public westmere::implementat namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -81696,15 +115661,15 @@ class array_iterator { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -81712,28 +115677,31 @@ struct simdjson_result : public westmere::im // Iterator interface // - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H -/* end file simdjson/generic/ondemand/array_iterator.h for westmere */ -/* including simdjson/generic/ondemand/document.h for westmere: #include "simdjson/generic/ondemand/document.h" */ -/* begin file simdjson/generic/ondemand/document.h for westmere */ +/* end file simdjson/generic/ondemand/array_iterator.h for lasx */ +/* including simdjson/generic/ondemand/document.h for lasx: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -81904,22 +115872,39 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template."); + /** + * @overload template simdjson_result get() & noexcept + * + * We disallow the use tag_invoke CPO on a moved document; it may create UB + * if user uses `ondemand::array` or `ondemand::object` in their custom type. + * + * The member function is still remains specialize-able for compatibility + * reasons, but we completely disallow its use when a tag_invoke customization + * is provided. + */ + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); } /** @@ -81933,11 +115918,57 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_inline error_code get(T &out) & noexcept; + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + /** @overload template error_code get(T &out) & noexcept */ - template simdjson_inline error_code get(T &out) && noexcept; + template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an instance of type T. The programmer is responsible for + * providing an implementation of get for the type T, if T is not one of the types + * supported by the library (object, array, raw_json_string, string_view, uint64_t, etc.) + * + * See https://github.com/simdjson/simdjson/blob/master/doc/basics.md#adding-support-for-custom-types + * + * @returns An instance of type T + */ + template + explicit simdjson_inline operator T() & noexcept(false); + template + explicit simdjson_deprecated simdjson_inline operator T() && noexcept(false); + /** * Cast this JSON value to an array. * @@ -82021,7 +116052,8 @@ class document { * calling this function, if successful, the array is 'rewinded' at its * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer - * safe to continue. + * safe to continue. Note that count_elements() does not validate the JSON values, + * only the structure of the array. */ simdjson_inline simdjson_result count_elements() & noexcept; /** @@ -82110,7 +116142,7 @@ class document { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * You must consume the fields on an object one at a time. A request for a new key * invalidates previous field values: it makes them unsafe. E.g., the array @@ -82133,6 +116165,7 @@ class document { simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; /** * Get the type of this JSON value. It does not validate or consume the value. @@ -82156,6 +116189,14 @@ class document { */ simdjson_inline simdjson_result is_scalar() noexcept; + /** + * Checks whether the document is a string. + * + * @returns true if the type is string + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_string() noexcept; + /** * Checks whether the document is a negative number. * @@ -82183,9 +116224,11 @@ class document { * get_number().get_number_type(). * * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 + * an integer greater or equal to 9223372036854775808 and no larger than 18446744073709551615. * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 + * integer that is less than 9223372036854775808 and greater or equal to -9223372036854775808. + * get_number_type() is number_type::big_integer if we have an integer outside + * of those ranges (either larger than 18446744073709551615 or smaller than -9223372036854775808). * Otherwise, get_number_type() has value number_type::floating_point_number * * This function requires processing the number string, but it is expected @@ -82303,6 +116346,14 @@ class document { * auto doc = parser.iterate(json); * doc.at_pointer("//a/1") == 20 * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/\\u00E9") == 123 + * doc.at_pointer((const char*)u8"/\u00E9") returns an error (NO_SUCH_FIELD) + * * Note that at_pointer() automatically calls rewind between each call. Thus * all values, objects and arrays that you have created so far (including unescaped strings) * are invalidated. After calling at_pointer, you need to consume the result: string values @@ -82319,6 +116370,30 @@ class document { * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * + * Key values are matched exactly, without unescaping or Unicode normalization. + * We do a byte-by-byte comparison. E.g. + * + * const padded_string json = "{\"\\u00E9\":123}"_padded; + * auto doc = parser.iterate(json); + * doc.at_path(".\\u00E9") == 123 + * doc.at_path((const char*)u8".\u00E9") returns an error (NO_SUCH_FIELD) + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing @@ -82359,6 +116434,11 @@ class document { /** * A document_reference is a thin wrapper around a document reference instance. + * The document_reference instances are used primarily/solely for streams of JSON + * documents. They differ from document instances when parsing a scalar value + * (a document that is not an array or an object). In the case of a document, + * we expect the document to be fully consumed. In the case of a document_reference, + * we allow trailing content. */ class document_reference { public: @@ -82384,10 +116464,82 @@ class document_reference { simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template + simdjson_inline simdjson_result get() & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(std::is_default_constructible::value, "Cannot initialize the specified type."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + template + simdjson_inline simdjson_result get() && +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { + static_assert(!std::is_same::value && !std::is_same::value, "You should never hold either an ondemand::array or ondemand::object without a corresponding ondemand::document_reference being alive; that would be Undefined Behaviour."); + return static_cast(*this).get(); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_DESERIALIZATION + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) +#else + noexcept +#endif + { +#if SIMDJSON_SUPPORTS_DESERIALIZATION + if constexpr (custom_deserializable) { + return deserialize(*this, out); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_DESERIALIZATION + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION + } + + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; simdjson_inline operator document&() const noexcept; - #if SIMDJSON_EXCEPTIONS + template + explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator array() & noexcept(false); simdjson_inline operator object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); @@ -82407,11 +116559,13 @@ class document_reference { simdjson_inline simdjson_result find_field(const char *key) & noexcept; simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; @@ -82421,25 +116575,27 @@ class document_reference { simdjson_inline simdjson_result get_number() noexcept; simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + private: document *doc{nullptr}; }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::document &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -82450,52 +116606,56 @@ struct simdjson_result : public westmere::implemen template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & noexcept; - template simdjson_inline simdjson_result get() && noexcept; + template simdjson_deprecated simdjson_inline simdjson_result get() && noexcept; template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; - #if SIMDJSON_EXCEPTIONS - simdjson_inline operator westmere::ondemand::array() & noexcept(false); - simdjson_inline operator westmere::ondemand::object() & noexcept(false); + template ::value == false>::type> + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lasx::ondemand::array() & noexcept(false); + simdjson_inline operator lasx::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator lasx::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator westmere::ondemand::value() noexcept(false); + simdjson_inline operator lasx::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline int32_t current_depth() const noexcept; simdjson_inline bool at_end() const noexcept; simdjson_inline bool is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; @@ -82506,14 +116666,14 @@ struct simdjson_result : public westmere::implemen namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result(lasx::ondemand::document_reference value, error_code error) noexcept; simdjson_inline simdjson_result() noexcept = default; simdjson_inline error_code rewind() noexcept; - simdjson_inline simdjson_result get_array() & noexcept; - simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; simdjson_inline simdjson_result get_uint64() noexcept; simdjson_inline simdjson_result get_uint64_in_string() noexcept; simdjson_inline simdjson_result get_int64() noexcept; @@ -82524,54 +116684,64 @@ struct simdjson_result : public westmere template simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; - simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; simdjson_inline simdjson_result is_null() noexcept; + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_inline operator westmere::ondemand::array() & noexcept(false); - simdjson_inline operator westmere::ondemand::object() & noexcept(false); + template + explicit simdjson_inline operator T() noexcept(false); + simdjson_inline operator lasx::ondemand::array() & noexcept(false); + simdjson_inline operator lasx::ondemand::object() & noexcept(false); simdjson_inline operator uint64_t() noexcept(false); simdjson_inline operator int64_t() noexcept(false); simdjson_inline operator double() noexcept(false); simdjson_inline operator std::string_view() noexcept(false); - simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator lasx::ondemand::raw_json_string() noexcept(false); simdjson_inline operator bool() noexcept(false); - simdjson_inline operator westmere::ondemand::value() noexcept(false); + simdjson_inline operator lasx::ondemand::value() noexcept(false); #endif simdjson_inline simdjson_result count_elements() & noexcept; simdjson_inline simdjson_result count_fields() & noexcept; - simdjson_inline simdjson_result at(size_t index) & noexcept; - simdjson_inline simdjson_result begin() & noexcept; - simdjson_inline simdjson_result end() & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_result operator[](int) & noexcept = delete; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_string() noexcept; simdjson_inline simdjson_result current_location() noexcept; simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result is_negative() noexcept; simdjson_inline simdjson_result is_integer() noexcept; - simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ simdjson_inline simdjson_result raw_json_token() noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H -/* end file simdjson/generic/ondemand/document.h for westmere */ -/* including simdjson/generic/ondemand/document_stream.h for westmere: #include "simdjson/generic/ondemand/document_stream.h" */ -/* begin file simdjson/generic/ondemand/document_stream.h for westmere */ +/* end file simdjson/generic/ondemand/document.h for lasx */ +/* including simdjson/generic/ondemand/document_stream.h for lasx: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -82589,7 +116759,7 @@ struct simdjson_result : public westmere #endif namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { #ifdef SIMDJSON_THREADS_ENABLED @@ -82696,10 +116866,9 @@ class document_stream { class iterator { public: using value_type = simdjson_result; - using reference = value_type; - + using reference = simdjson_result; + using pointer = void; using difference_type = std::ptrdiff_t; - using iterator_category = std::input_iterator_tag; /** @@ -82709,7 +116878,7 @@ class document_stream { /** * Get the current document (or error). */ - simdjson_inline simdjson_result operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -82891,18 +117060,18 @@ class document_stream { friend class document; friend class json_iterator; friend struct simdjson_result; - friend struct internal::simdjson_result_base; + friend struct simdjson::internal::simdjson_result_base; }; // document_stream } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::document_stream &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; }; @@ -82910,9 +117079,9 @@ struct simdjson_result : public westmere::i } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H -/* end file simdjson/generic/ondemand/document_stream.h for westmere */ -/* including simdjson/generic/ondemand/field.h for westmere: #include "simdjson/generic/ondemand/field.h" */ -/* begin file simdjson/generic/ondemand/field.h for westmere */ +/* end file simdjson/generic/ondemand/document_stream.h for lasx */ +/* including simdjson/generic/ondemand/field.h for lasx: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -82924,7 +117093,7 @@ struct simdjson_result : public westmere::i /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -82951,12 +117120,39 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; + /** + * Get the unprocessed key as a string_view. This includes the quotes and may include + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). + */ + simdjson_inline std::string_view key_raw_json_token() const noexcept; + /** + * Get the key as a string_view. This does not include the quotes and + * the string is unprocessed key so it may contain escape characters + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. + */ + simdjson_inline std::string_view escaped_key() const noexcept; /** * Get the field value. */ @@ -82975,29 +117171,33 @@ class field : public std::pair { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::field &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; - simdjson_inline simdjson_result key() noexcept; - simdjson_inline simdjson_result value() noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result key_raw_json_token() noexcept; + simdjson_inline simdjson_result escaped_key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H -/* end file simdjson/generic/ondemand/field.h for westmere */ -/* including simdjson/generic/ondemand/object.h for westmere: #include "simdjson/generic/ondemand/object.h" */ -/* begin file simdjson/generic/ondemand/object.h for westmere */ +/* end file simdjson/generic/ondemand/field.h for lasx */ +/* including simdjson/generic/ondemand/object.h for lasx: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -83008,7 +117208,7 @@ struct simdjson_result : public westmere::implementat /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { /** @@ -83056,6 +117256,8 @@ class object { * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() * is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -83077,7 +117279,7 @@ class object { * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). + * field was not there when they are not in order). * * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful * that only one field is returned. @@ -83093,6 +117295,8 @@ class object { * You are expected to access keys only once. You should access the value corresponding to a key * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. * + * If you expect to have keys with escape characters, please review our documentation. + * * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ @@ -83140,11 +117344,24 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Get the value associated with the given JSONPath expression. We only support + * JSONPath queries that trivially convertible to JSON Pointer queries: key + * names and array indices. + * + * @return The value associated with the given JSONPath expression, or: + * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + */ + inline simdjson_result at_path(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -83206,27 +117423,29 @@ class object { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::object &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; - simdjson_inline simdjson_result begin() noexcept; - simdjson_inline simdjson_result end() noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -83237,9 +117456,9 @@ struct simdjson_result : public westmere::implementa } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H -/* end file simdjson/generic/ondemand/object.h for westmere */ -/* including simdjson/generic/ondemand/object_iterator.h for westmere: #include "simdjson/generic/ondemand/object_iterator.h" */ -/* begin file simdjson/generic/ondemand/object_iterator.h for westmere */ +/* end file simdjson/generic/ondemand/object.h for lasx */ +/* including simdjson/generic/ondemand/object_iterator.h for lasx: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -83250,7 +117469,7 @@ struct simdjson_result : public westmere::implementa /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { class object_iterator { @@ -83291,15 +117510,15 @@ class object_iterator { }; } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { template<> -struct simdjson_result : public westmere::implementation_simdjson_result_base { +struct simdjson_result : public lasx::implementation_simdjson_result_base { public: - simdjson_inline simdjson_result(westmere::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(lasx::ondemand::object_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -83308,21 +117527,21 @@ struct simdjson_result : public westmere::i // // Reads key and value, yielding them to the user. - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H -/* end file simdjson/generic/ondemand/object_iterator.h for westmere */ -/* including simdjson/generic/ondemand/serialization.h for westmere: #include "simdjson/generic/ondemand/serialization.h" */ -/* begin file simdjson/generic/ondemand/serialization.h for westmere */ +/* end file simdjson/generic/ondemand/object_iterator.h for lasx */ +/* including simdjson/generic/ondemand/serialization.h for lasx: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -83336,30 +117555,30 @@ namespace simdjson { * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept; +inline simdjson_result to_json_string(lasx::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must * not have been accessed previously. It does not * validate the content. */ -inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept; +inline simdjson_result to_json_string(lasx::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept; +inline simdjson_result to_json_string(lasx::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. It does not * validate the content. */ -inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(lasx::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); } // namespace simdjson /** @@ -83369,7 +117588,7 @@ inline simdjson_result to_json_string(simdjson_result x); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** - * Print JSON to an output stream. It does not - * validate the content. - * - * @param out The output stream. - * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::lasx::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for lasx */ + +// Deserialization for standard types +/* including simdjson/generic/ondemand/std_deserialize.h for lasx: #include "simdjson/generic/ondemand/std_deserialize.h" */ +/* begin file simdjson/generic/ondemand/std_deserialize.h for lasx */ +#if SIMDJSON_SUPPORTS_DESERIALIZATION + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +template +constexpr bool require_custom_serialization = false; + +////////////////////////////// +// Number deserialization +////////////////////////////// + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + uint64_t x; + SIMDJSON_TRY(val.get_uint64().get(x)); + if (x > (limits::max)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + double x; + SIMDJSON_TRY(val.get_double().get(x)); + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { + using limits = std::numeric_limits; + + int64_t x; + SIMDJSON_TRY(val.get_int64().get(x)); + if (x > (limits::max)() || x < (limits::min)()) { + return NUMBER_OUT_OF_RANGE; + } + out = static_cast(x); + return SUCCESS; +} + +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + +/** + * STL containers have several constructors including one that takes a single + * size argument. Thus, some compilers (Visual Studio) will not be able to + * disambiguate between the size and container constructor. Users should + * explicitly specify the type of the container as needed: e.g., + * doc.get>(). + */ +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::value_type; + static_assert( + deserializable, + "The specified type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the container must default constructible."); + + lasx::ondemand::array arr; + SIMDJSON_TRY(val.get_array().get(arr)); + for (auto v : arr) { + if constexpr (concepts::returns_reference) { + if (auto const err = v.get().get(concepts::emplace_one(out)); + err) { + // If an error occurs, the empty element that we just inserted gets + // removed. We're not using a temp variable because if T is a heavy + // type, we want the valid path to be the fast path and the slow path be + // the path that has errors in it. + if constexpr (requires { out.pop_back(); }) { + static_cast(out.pop_back()); + } + return err; + } + } else { + value_type temp; + if (auto const err = v.get().get(temp); err) { + return err; + } + concepts::emplace_one(out, std::move(temp)); + } + } + return SUCCESS; +} + + +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); -#endif + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + lasx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + + + + /** - * Print JSON to an output stream. It does not - * validate the content. + * This CPO (Customization Point Object) will help deserialize into + * smart pointers. * - * @param out The output stream. - * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * If constructing T is nothrow, this conversion should be nothrow as well since + * we return MEMALLOC if we're not able to allocate memory instead of throwing + * the error message. + * + * @tparam T The type inside the smart pointer + * @tparam ValT document/value type + * @param val document/value + * @param out a reference to the smart pointer + * @return status of the conversion */ -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); -#endif -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); -#endif +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { + using element_type = typename std::remove_cvref_t::element_type; + + // For better error messages, don't use these as constraints on + // the tag_invoke CPO. + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + auto ptr = new (std::nothrow) element_type(); + if (ptr == nullptr) { + return MEMALLOC; + } + SIMDJSON_TRY(val.template get(*ptr)); + out.reset(ptr); + return SUCCESS; +} + /** - * Print JSON to an output stream. It does not - * validate the content. - * - * @param out The output stream. - * @param value The object. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * This CPO (Customization Point Object) will help deserialize into optional types. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); -#endif -}}} // namespace simdjson::westmere::ondemand +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { + using value_type = typename std::remove_cvref_t::value_type; + + static_assert( + deserializable, + "The specified type inside the unique_ptr must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified type inside the unique_ptr must default constructible."); + + if (!out) { + out.emplace(); + } + SIMDJSON_TRY(val.template get(out.value())); + return SUCCESS; +} -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H -/* end file simdjson/generic/ondemand/serialization.h for westmere */ +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +/* end file simdjson/generic/ondemand/std_deserialize.h for lasx */ // Inline definitions -/* including simdjson/generic/ondemand/array-inl.h for westmere: #include "simdjson/generic/ondemand/array-inl.h" */ -/* begin file simdjson/generic/ondemand/array-inl.h for westmere */ +/* including simdjson/generic/ondemand/array-inl.h for lasx: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ /* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ @@ -83444,7 +117878,7 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result array::at_pointer(std::string_view json_pointer) n return child; } +inline simdjson_result array::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { return INVALID_JSON_POINTER; } + return at_pointer(json_pointer); +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -83606,60 +118046,64 @@ simdjson_inline simdjson_result array::at(size_t index) noexcept { } } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::array &&value +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::array &&value ) noexcept - : implementation_simdjson_result_base( - std::forward(value) + : implementation_simdjson_result_base( + std::forward(value) ) { } -simdjson_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept - : implementation_simdjson_result_base(error) + : implementation_simdjson_result_base(error) { } -simdjson_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); } } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H -/* end file simdjson/generic/ondemand/array-inl.h for westmere */ -/* including simdjson/generic/ondemand/array_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +/* end file simdjson/generic/ondemand/array-inl.h for lasx */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -83671,7 +118115,7 @@ simdjson_inline simdjson_result simdjson_result::simdjson_result( - westmere::ondemand::array_iterator &&value +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::array_iterator &&value ) noexcept - : westmere::implementation_simdjson_result_base(std::forward(value)) + : lasx::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : westmere::implementation_simdjson_result_base({}, error) +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : lasx::implementation_simdjson_result_base({}, error) { } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); @@ -83738,3738 +118182,4063 @@ simdjson_inline simdjson_result &simdjson_re } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/document-inl.h for westmere: #include "simdjson/generic/ondemand/document-inl.h" */ -/* begin file simdjson/generic/ondemand/document-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/value-inl.h for lasx: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { -simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept - : iter{std::forward(_iter)} +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} { - logger::log_start_value(iter, "document"); -} - -simdjson_inline document document::start(json_iterator &&iter) noexcept { - return document(std::forward(iter)); } - -inline void document::rewind() noexcept { - iter.rewind(); -} - -inline std::string document::to_debug_string() noexcept { - return iter.to_string(); -} - -inline simdjson_result document::current_location() const noexcept { - return iter.current_location(); -} - -inline int32_t document::current_depth() const noexcept { - return iter.depth(); +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; } - -inline bool document::at_end() const noexcept { - return iter.at_end(); +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; } - -inline bool document::is_alive() noexcept { - return iter.is_alive(); -} -simdjson_inline value_iterator document::resume_value_iterator() noexcept { - return value_iterator(&iter, 1, iter.root_position()); +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); } -simdjson_inline value_iterator document::get_root_value_iterator() noexcept { - return resume_value_iterator(); +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); } -simdjson_inline simdjson_result document::start_or_resume_object() noexcept { - if (iter.at_root()) { +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { return get_object(); } else { - return object::resume(resume_value_iterator()); - } -} -simdjson_inline simdjson_result document::get_value() noexcept { - // Make sure we start any arrays or objects before returning, so that start_root_() - // gets called. - - // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether - // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } -#endif - // assert_at_root() serves two purposes: in Debug mode, whether or not - // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of - // the document (this will typically be redundant). In release mode, it generates - // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. - iter.assert_at_root(); - switch (*iter.peek()) { - case '[': { - // The following lines check that the document ends with ]. - auto value_iterator = get_root_value_iterator(); - auto error = value_iterator.check_root_array(); - if(error) { return error; } - return value(get_root_value_iterator()); - } - case '{': { - // The following lines would check that the document ends with }. - auto value_iterator = get_root_value_iterator(); - auto error = value_iterator.check_root_object(); - if(error) { return error; } - return value(get_root_value_iterator()); - } - default: - // Unfortunately, scalar documents are a special case in simdjson and they cannot - // be safely converted to value instances. - return SCALAR_DOCUMENT_AS_VALUE; + return object::resume(iter); } } -simdjson_inline simdjson_result document::get_array() & noexcept { - auto value = get_root_value_iterator(); - return array::start_root(value); -} -simdjson_inline simdjson_result document::get_object() & noexcept { - auto value = get_root_value_iterator(); - return object::start_root(value); -} - -/** - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. We want to disallow trailing - * content. - * Thus, in several implementations below, we pass a 'true' parameter value to - * a get_root_value_iterator() method: this indicates that we disallow trailing content. - */ -simdjson_inline simdjson_result document::get_uint64() noexcept { - return get_root_value_iterator().get_root_uint64(true); -} -simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { - return get_root_value_iterator().get_root_uint64_in_string(true); -} -simdjson_inline simdjson_result document::get_int64() noexcept { - return get_root_value_iterator().get_root_int64(true); -} -simdjson_inline simdjson_result document::get_int64_in_string() noexcept { - return get_root_value_iterator().get_root_int64_in_string(true); -} -simdjson_inline simdjson_result document::get_double() noexcept { - return get_root_value_iterator().get_root_double(true); -} -simdjson_inline simdjson_result document::get_double_in_string() noexcept { - return get_root_value_iterator().get_root_double_in_string(true); +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); } -simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { - return get_root_value_iterator().get_root_string(true, allow_replacement); +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { - return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); -} -simdjson_inline simdjson_result document::get_wobbly_string() noexcept { - return get_root_value_iterator().get_root_wobbly_string(true); -} -simdjson_inline simdjson_result document::get_raw_json_string() noexcept { - return get_root_value_iterator().get_root_raw_json_string(true); -} -simdjson_inline simdjson_result document::get_bool() noexcept { - return get_root_value_iterator().get_root_bool(true); -} -simdjson_inline simdjson_result document::is_null() noexcept { - return get_root_value_iterator().is_root_null(true); -} - -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } - -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_inline document::operator array() & noexcept(false) { return get_array(); } -simdjson_inline document::operator object() & noexcept(false) { return get_object(); } -simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document::operator value() noexcept(false) { return get_value(); } - -#endif -simdjson_inline simdjson_result document::count_elements() & noexcept { - auto a = get_array(); - simdjson_result answer = a.count_elements(); - /* If there was an array, we are now left pointing at its first element. */ - if(answer.error() == SUCCESS) { rewind(); } - return answer; -} -simdjson_inline simdjson_result document::count_fields() & noexcept { - auto a = get_object(); - simdjson_result answer = a.count_fields(); - /* If there was an object, we are now left pointing at its first element. */ - if(answer.error() == SUCCESS) { rewind(); } - return answer; -} -simdjson_inline simdjson_result document::at(size_t index) & noexcept { - auto a = get_array(); - return a.at(index); -} -simdjson_inline simdjson_result document::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result document::end() & noexcept { - return {}; -} - -simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { + return iter.get_string(receiver, allow_replacement); } - -simdjson_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); } - -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); } - -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); } - -simdjson_inline simdjson_result document::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); } - -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); } - -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); } - -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); } - -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); } - - -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); } -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson -namespace simdjson { +template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline value::operator T() noexcept(false) { + return get(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); +simdjson_inline value::operator array() noexcept(false) { + return get_array(); } -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); +simdjson_inline value::operator object() noexcept(false) { + return get_object(); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); +simdjson_inline value::operator double() noexcept(false) { + return get_double(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); } +#endif -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); +simdjson_inline simdjson_result value::end() & noexcept { + return {}; } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); } -template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); } - -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); } - -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline simdjson_result value::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); } -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); } -simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); } -simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); } -#endif - -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); +simdjson_inline simdjson_result value::raw_json() noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: { + ondemand::array array; + SIMDJSON_TRY(get_array().get(array)); + return array.raw_json(); + } + case json_type::object: { + ondemand::object object; + SIMDJSON_TRY(get_object().get(object)); + return object.raw_json(); + } + default: + return raw_json_token(); + } } - -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline bool is_pointer_well_formed(std::string_view json_pointer) noexcept { + if (simdjson_unlikely(json_pointer.empty())) { // can't be + return false; + } + if (simdjson_unlikely(json_pointer[0] != '/')) { + return false; + } + size_t escape = json_pointer.find('~'); + if (escape == std::string_view::npos) { + return true; + } + if (escape == json_pointer.size() - 1) { + return false; + } + if (json_pointer[escape + 1] != '0' && json_pointer[escape + 1] != '1') { + return false; + } + return true; } +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + // a non-empty string can be invalid, or accessing a primitive (issue 2154) + if (is_pointer_well_formed(json_pointer)) { + return NO_SUCH_FIELD; + } + return INVALID_JSON_POINTER; + } +} -} // namespace simdjson - - -namespace simdjson { -namespace westmere { -namespace ondemand { - -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } - -#if SIMDJSON_EXCEPTIONS -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +simdjson_inline simdjson_result value::at_path(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson - - namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} - -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } - return first.find_field_unordered(key); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } - return first.find_field_unordered(key); + return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } - return first[key]; + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } - return first[key]; + return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } - return first.find_field(key); + return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } - return first.find_field(key); + return first[key]; } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } - return first.get_value(); + return first.is_null(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + +template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::value &out) noexcept { if (error()) { return error(); } - return first.is_null(); + out = first; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { +template +simdjson_inline simdjson_result::operator T() noexcept(false) { if (error()) { throw simdjson_error(error()); } - return first; + return first.get(); } -simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator lasx::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator lasx::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for westmere */ -/* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include - -namespace simdjson { -namespace westmere { -namespace ondemand { - -#ifdef SIMDJSON_THREADS_ENABLED - -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); -} - -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); -} - -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); -} - - -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } -} - -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); + return first.raw_json_token(); } -#endif // SIMDJSON_THREADS_ENABLED - -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; +simdjson_inline simdjson_result simdjson_result::at_pointer( + std::string_view json_pointer) noexcept { + if (error()) { + return error(); + } + return first.at_pointer(json_pointer); } -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for lasx */ +/* including simdjson/generic/ondemand/document-inl.h for lasx: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); } -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); } -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } - return simdjson_result(stream->doc, stream->error); +inline void document::rewind() noexcept { + iter.rewind(); } -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); } -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +inline int32_t document::current_depth() const noexcept { + return iter.depth(); } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +inline bool document::at_end() const noexcept { + return iter.at_end(); } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); } - #endif // SIMDJSON_THREADS_ENABLED } +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; - - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ - - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; + // It is the convention throughout the code that the macro `SIMDJSON_DEVELOPMENT_CHECKS` determines whether + // we check for OUT_OF_ORDER_ITERATION. Proper on::demand code should never trigger this error. +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.at_root()) { return OUT_OF_ORDER_ITERATION; } +#endif + // assert_at_root() serves two purposes: in Debug mode, whether or not + // SIMDJSON_DEVELOPMENT_CHECKS is set or not, it checks that we are at the root of + // the document (this will typically be redundant). In release mode, it generates + // SIMDJSON_ASSUME statements to allow the compiler to make assumptions. + iter.assert_at_root(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; } } +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +template +simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); } -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } + +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +#if SIMDJSON_EXCEPTIONS +template +simdjson_deprecated simdjson_inline document::operator T() && noexcept(false) { return get(); } +template +simdjson_inline document::operator T() & noexcept(false) { return get(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: Remove any trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); - } - cur_struct_index++; - } +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; } } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); - - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } } -#endif // SIMDJSON_THREADS_ENABLED - } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::document &&value ) noexcept : - implementation_simdjson_result_base(error) + implementation_simdjson_result_base( + std::forward(value) + ) { } -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::document_stream &&value +simdjson_inline simdjson_result::simdjson_result( + error_code error ) noexcept : - implementation_simdjson_result_base( - std::forward(value) + implementation_simdjson_result_base( + error ) { } +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); } -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ -/* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} -namespace simdjson { -namespace westmere { -namespace ondemand { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } return first; } - -simdjson_inline value &field::value() & noexcept { - return second; +simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -} // namespace ondemand -} // namespace westmere -} // namespace simdjson -namespace simdjson { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } - return first.key(); + return first.raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } - return first.unescaped_key(allow_replacement); + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { if (error()) { return error(); } - return std::move(first.value()); + return first.at_path(json_path); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for westmere */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); } -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_inline error_code simdjson_result::get(lasx::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_inline error_code simdjson_result::get(lasx::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); } - return count == 0; + return first.at_path(json_path); } +} // namespace simdjson -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for lasx */ +/* including simdjson/generic/ondemand/document_stream-inl.h for lasx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } - } +#include +#include - return report_error(TAPE_ERROR, "not enough close braces"); -} +namespace simdjson { +namespace lasx { +namespace ondemand { -SIMDJSON_POP_DISABLE_WARNINGS +#ifdef SIMDJSON_THREADS_ENABLED -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); } -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); } -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; -} -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); -} +#endif // SIMDJSON_THREADS_ENABLED -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } #endif } -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); -} -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); - } else { - return reinterpret_cast(token.peek()); - } - } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; -} -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); -} +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; -} +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; -} + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { - return parser->unescape(in, _string_buf_loc, allow_replacement); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { - return parser->unescape_wobbly(in, _string_buf_loc); +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; -} +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; } - return TAPE_ERROR; -} -#if SIMDJSON_DEVELOPMENT_CHECKS + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; } -#endif +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } +#endif // SIMDJSON_THREADS_ENABLED + } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} -} // namespace simdjson +} -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for lasx */ +/* including simdjson/generic/ondemand/field-inl.h for lasx: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; -} +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -#endif - +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} -simdjson_inline number_type number::get_number_type() const noexcept { - return type; +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_inline value &field::value() & noexcept { + return second; } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; -} +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -simdjson_inline number::operator double() const noexcept { - return get_double(); -} +namespace simdjson { -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; - } - if(is_int64()) { - return double(payload.signed_integer); - } - return double(payload.unsigned_integer); +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } - -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { + if (error()) { return error(); } + return first.key_raw_json_token(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { + if (error()) { return error(); } + return first.escaped_key(); } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} -namespace simdjson { +template +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ -/* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} -template -static inline std::string string_format(const std::string& format, const Args&... args) +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} { - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); + other.parser = nullptr; } - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} -static inline bool should_log(log_level level) { - return level >= log_threshold(); -} - -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); -} - -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); -} - -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} - -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); -} +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; } } + + return report_error(TAPE_ERROR, "not enough close braces"); } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); - } - } +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -} // namespace logger -} // namespace ondemand -} // namespace westmere -} // namespace simdjson +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for westmere */ -/* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} -namespace simdjson { -namespace westmere { -namespace ondemand { +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif } -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - return value(iter.child()); + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); } -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; - - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching - } else { - child = find_field(key); - } - if(child.error()) { - return child; // we do not continue if there was an error - } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); - } - return child; +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; } -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; } -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; } + +} // namespace ondemand +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for westmere */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_type-inl.h for lasx: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { -// -// object_iterator -// +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} -} // namespace ondemand -} // namespace westmere -} // namespace simdjson +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} -namespace simdjson { +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); +simdjson_inline number::operator double() const noexcept { + return get_double(); } -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); } -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; } -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; } +} // namespace ondemand +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for lasx */ +/* including simdjson/generic/ondemand/logger-inl.h for lasx: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { -namespace westmere { +namespace lasx { namespace ondemand { +namespace logger { -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} - -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); -#if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); -#endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + return ' '; } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); - - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); - - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } } } -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for westmere */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for lasx */ +/* including simdjson/generic/ondemand/object-inl.h for lasx: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { - -namespace westmere { +namespace lasx { namespace ondemand { -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return true; + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } } - return true; + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); - } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); } - if(r[pos] != '"') { return false; } - return true; + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; } - if(r[pos] != '"') { return false; } - return true; + return at_pointer(json_pointer); } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); -} +} // namespace ondemand +} // namespace lasx +} // namespace simdjson +namespace simdjson { -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); } + return first.at_path(json_path); } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { +inline simdjson_result simdjson_result::is_empty() noexcept { if (error()) { return error(); } - return first.raw(); + return first.is_empty(); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } - return first.unescape(iter, allow_replacement); + return first.count_fields(); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } - return first.unescape_wobbly(iter); + return first.raw_json(); } } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ -/* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for lasx */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace lasx { +namespace ondemand { -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); -} +// +// object_iterator +// +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} -inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; } - -inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); } - -inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace westmere::ondemand; - westmere::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - westmere::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - westmere::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } -inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; } + } // namespace simdjson -namespace simdjson { namespace westmere { namespace ondemand { +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/parser-inl.h for lasx: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); } else { - throw simdjson::simdjson_error(error); + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; } + return document::start({ reinterpret_cast(json.data()), this, true }); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + if(json.capacity() - json.size() < SIMDJSON_PADDING) { + json.reserve(json.size() + SIMDJSON_PADDING); } + return iterate(padded_string_view(json)); } -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -#endif -}}} // namespace simdjson::westmere::ondemand -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } -namespace simdjson { -namespace westmere { -namespace ondemand { + json.remove_utf8_bom(); -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ -} + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; + } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); } - -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; -} -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; -} -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; -} -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; -} -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; -} -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/value-inl.h for westmere: #include "simdjson/generic/ondemand/value-inl.h" */ -/* begin file simdjson/generic/ondemand/value-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for lasx */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for lasx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { + +namespace lasx { namespace ondemand { -simdjson_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_inline value value::resume(const value_iterator &iter) noexcept { - return iter; -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); - } else { - return object::resume(iter); - } -} +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } -simdjson_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { - return iter.get_string(allow_replacement); -} -template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { - return iter.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result value::get_wobbly_string() noexcept { - return iter.get_wobbly_string(); -} -simdjson_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); -} -simdjson_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); -} -simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); -} -simdjson_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); -} -simdjson_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); -} -simdjson_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); -} -simdjson_inline simdjson_result value::is_null() noexcept { - return iter.is_null(); -} -template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template simdjson_inline error_code value::get(T &out) noexcept { - return get().get(out); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -#if SIMDJSON_EXCEPTIONS -simdjson_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_inline value::operator object() noexcept(false) { - return get_object(); -} -simdjson_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); -} -simdjson_inline value::operator int64_t() noexcept(false) { - return get_int64(); -} -simdjson_inline value::operator double() noexcept(false) { - return get_double(); -} -simdjson_inline value::operator std::string_view() noexcept(false) { - return get_string(false); -} -simdjson_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); -} -simdjson_inline value::operator bool() noexcept(false) { - return get_bool(); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -#endif -simdjson_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; -} -simdjson_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); -} -simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; -} -simdjson_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline simdjson_result value::type() noexcept { - return iter.type(); -} -simdjson_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline bool value::is_negative() noexcept { - return iter.is_negative(); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -simdjson_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); -} -simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); } -simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; } -simdjson_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); } -simdjson_inline simdjson_result value::raw_json() noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: { - ondemand::array array; - SIMDJSON_TRY(get_array().get(array)); - return array.raw_json(); - } - case json_type::object: { - ondemand::object object; - SIMDJSON_TRY(get_object().get(object)); - return object.raw_json(); - } - default: - return raw_json_token(); - } +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); } -simdjson_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); } -simdjson_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); } -simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; } } } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } - return first.count_elements(); + return first.raw(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } - return first.count_fields(); + return first.unescape(iter, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } - return first.at(index); + return first.unescape_wobbly(iter); } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ +/* including simdjson/generic/ondemand/serialization-inl.h for lasx: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; + + +inline simdjson_result to_json_string(lasx::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); +inline simdjson_result to_json_string(lasx::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); + +inline simdjson_result to_json_string(lasx::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace lasx::ondemand; + lasx::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + lasx::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + lasx::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); +inline simdjson_result to_json_string(lasx::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +inline simdjson_result to_json_string(lasx::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +} // namespace simdjson + +namespace simdjson { namespace lasx { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -template simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -template simdjson_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::lasx::ondemand -template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for lasx */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ } -template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -#if SIMDJSON_EXCEPTIONS -simdjson_inline simdjson_result::operator westmere::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); } -simdjson_inline simdjson_result::operator westmere::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; } -simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } -#endif - -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} +namespace simdjson { -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H -/* end file simdjson/generic/ondemand/value-inl.h for westmere */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ #ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -87478,13 +122247,13 @@ simdjson_inline simdjson_result simdjson_result value_iterator::find_ } else if (!is_open()) { #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -87721,7 +122490,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_ #if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif @@ -88063,7 +122832,7 @@ simdjson_inline simdjson_result value_iterator::get_number() noexcept { } simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -88078,8 +122847,8 @@ simdjson_inline simdjson_result value_iterator::is_root_integer(bool check return answer; } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_start_length(); +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -88087,7 +122856,12 @@ simdjson_inline simdjson_result value_iterator::get_root_ uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } auto answer = numberparsing::get_number_type(tmpbuf); @@ -88095,15 +122869,21 @@ simdjson_inline simdjson_result value_iterator::get_root_ return answer; } simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. uint8_t tmpbuf[1074+8+1+1]; tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; + } + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); return NUMBER_ERROR; } number num; @@ -88135,7 +122915,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iter return raw_json_string(json+1); } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -88151,7 +122931,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -88167,7 +122947,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::g return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -88184,7 +122964,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. @@ -88201,7 +122981,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::ge return result; } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -88221,7 +123001,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest @@ -88239,27 +123019,33 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_start_length(); + auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); if(result) { // we have something that looks like a null. if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } return result; } @@ -88325,6 +123111,9 @@ simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); +} simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); @@ -88534,37 +123323,36 @@ simdjson_inline error_code value_iterator::report_error(error_code error, const } } // namespace ondemand -} // namespace westmere +} // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ -/* end file simdjson/generic/ondemand/amalgamated.h for westmere */ -/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ -/* begin file simdjson/westmere/end.h */ +/* end file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ + + +/* end file simdjson/generic/ondemand/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -SIMDJSON_UNTARGET_REGION -#endif - -/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ #undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/westmere/end.h */ +/* end file simdjson/lasx/end.h */ -#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H -/* end file simdjson/westmere/ondemand.h */ +#endif // SIMDJSON_LASX_ONDEMAND_H +/* end file simdjson/lasx/ondemand.h */ #else #error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION #endif @@ -88591,6 +123379,5 @@ namespace simdjson { #endif // SIMDJSON_ONDEMAND_H /* end file simdjson/ondemand.h */ - #endif // SIMDJSON_H /* end file simdjson.h */ diff --git a/tests/test_minefield.py b/tests/test_minefield.py index e9d71e3..754e06b 100644 --- a/tests/test_minefield.py +++ b/tests/test_minefield.py @@ -10,7 +10,7 @@ def test_parsing(parser, parsing_tests): for file in files: try: parser.load(file) - except ValueError: + except (ValueError, RuntimeError): # The source document was not expected to fail. if expected_result == 'y': raise